Merge branch 'perf/urgent' into perf/core, to pick up fixes
authorIngo Molnar <mingo@kernel.org>
Wed, 11 May 2016 14:56:38 +0000 (16:56 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 11 May 2016 14:56:38 +0000 (16:56 +0200)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
201 files changed:
Documentation/sysctl/kernel.txt
arch/arm/kernel/hw_breakpoint.c
arch/arm/kernel/perf_callchain.c
arch/arm64/kernel/hw_breakpoint.c
arch/arm64/kernel/perf_callchain.c
arch/metag/kernel/perf_callchain.c
arch/mips/kernel/perf_event.c
arch/powerpc/perf/callchain.c
arch/sparc/kernel/perf_event.c
arch/x86/Kconfig
arch/x86/events/Kconfig [new file with mode: 0644]
arch/x86/events/Makefile
arch/x86/events/amd/uncore.c
arch/x86/events/core.c
arch/x86/events/intel/Makefile [new file with mode: 0644]
arch/x86/events/intel/bts.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/lbr.c
arch/x86/events/intel/pt.c
arch/x86/events/intel/pt.h
arch/x86/events/intel/rapl.c
arch/x86/events/intel/uncore.c
arch/x86/events/msr.c
arch/x86/events/perf_event.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/msr-index.h
arch/x86/kernel/uprobes.c
arch/xtensa/kernel/perf_event.c
drivers/perf/arm_pmu.c
drivers/powercap/intel_rapl.c
include/linux/perf_event.h
include/uapi/linux/perf_event.h
kernel/bpf/stackmap.c
kernel/events/callchain.c
kernel/events/core.c
kernel/events/internal.h
kernel/events/ring_buffer.c
kernel/sysctl.c
kernel/trace/trace_event_perf.c
tools/Makefile
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-all.c
tools/build/feature/test-bpf.c
tools/build/feature/test-dwarf_getlocations.c [new file with mode: 0644]
tools/lib/api/fs/fs.c
tools/lib/api/fs/fs.h
tools/perf/Documentation/intel-pt.txt
tools/perf/Documentation/itrace.txt
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-diff.txt
tools/perf/Documentation/perf-list.txt
tools/perf/Documentation/perf-mem.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-sched.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf-trace.txt
tools/perf/Makefile.perf
tools/perf/arch/powerpc/Makefile
tools/perf/arch/powerpc/util/dwarf-regs.c
tools/perf/arch/powerpc/util/sym-handling.c
tools/perf/arch/x86/Makefile
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl [new file with mode: 0644]
tools/perf/arch/x86/entry/syscalls/syscalltbl.sh [new file with mode: 0755]
tools/perf/arch/x86/tests/perf-time-to-tsc.c
tools/perf/arch/x86/util/dwarf-regs.c
tools/perf/arch/x86/util/intel-bts.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/arch/x86/util/tsc.c
tools/perf/arch/x86/util/tsc.h [deleted file]
tools/perf/bench/futex-lock-pi.c
tools/perf/bench/futex.h
tools/perf/bench/mem-functions.c
tools/perf/builtin-config.c
tools/perf/builtin-diff.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/config/Makefile
tools/perf/jvmti/jvmti_agent.c
tools/perf/perf.c
tools/perf/perf.h
tools/perf/scripts/python/export-to-postgresql.py
tools/perf/tests/Build
tools/perf/tests/bpf.c
tools/perf/tests/builtin-test.c
tools/perf/tests/code-reading.c
tools/perf/tests/dso-data.c
tools/perf/tests/event-times.c [new file with mode: 0644]
tools/perf/tests/event_update.c
tools/perf/tests/hists_common.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_link.c
tools/perf/tests/hists_output.c
tools/perf/tests/keep-tracking.c
tools/perf/tests/openat-syscall-tp-fields.c
tools/perf/tests/perf-record.c
tools/perf/tests/switch-tracking.c
tools/perf/tests/tests.h
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/trace/beauty/eventfd.c [new file with mode: 0644]
tools/perf/trace/beauty/futex_op.c [new file with mode: 0644]
tools/perf/trace/beauty/mmap.c [new file with mode: 0644]
tools/perf/trace/beauty/mode_t.c [new file with mode: 0644]
tools/perf/trace/beauty/msg_flags.c [new file with mode: 0644]
tools/perf/trace/beauty/open_flags.c [new file with mode: 0644]
tools/perf/trace/beauty/perf_event_open.c [new file with mode: 0644]
tools/perf/trace/beauty/pid.c [new file with mode: 0644]
tools/perf/trace/beauty/sched_policy.c [new file with mode: 0644]
tools/perf/trace/beauty/signum.c [new file with mode: 0644]
tools/perf/trace/beauty/socket_type.c [new file with mode: 0644]
tools/perf/trace/beauty/waitid_options.c [new file with mode: 0644]
tools/perf/ui/browsers/hists.c
tools/perf/ui/gtk/hists.c
tools/perf/ui/hist.c
tools/perf/ui/stdio/hist.c
tools/perf/util/Build
tools/perf/util/annotate.c
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/bpf-loader.c
tools/perf/util/bpf-loader.h
tools/perf/util/build-id.c
tools/perf/util/call-path.c [new file with mode: 0644]
tools/perf/util/call-path.h [new file with mode: 0644]
tools/perf/util/callchain.c
tools/perf/util/callchain.h
tools/perf/util/config.c
tools/perf/util/config.h [new file with mode: 0644]
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/data.c
tools/perf/util/data.h
tools/perf/util/db-export.c
tools/perf/util/db-export.h
tools/perf/util/dwarf-aux.c
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/evsel_fprintf.c [new file with mode: 0644]
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/intel-bts.c
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
tools/perf/util/intel-pt.c
tools/perf/util/jitdump.c
tools/perf/util/jitdump.h
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/map.c
tools/perf/util/ordered-events.c
tools/perf/util/ordered-events.h
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-file.c
tools/perf/util/probe-finder.c
tools/perf/util/python-ext-sources
tools/perf/util/rb_resort.h [new file with mode: 0644]
tools/perf/util/record.c
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/symbol_fprintf.c [new file with mode: 0644]
tools/perf/util/syscalltbl.c [new file with mode: 0644]
tools/perf/util/syscalltbl.h [new file with mode: 0644]
tools/perf/util/thread-stack.c
tools/perf/util/thread-stack.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/thread_map.c
tools/perf/util/thread_map.h
tools/perf/util/tool.h
tools/perf/util/trigger.h [new file with mode: 0644]
tools/perf/util/tsc.h
tools/perf/util/unwind-libunwind.c
tools/perf/util/util.c
tools/perf/util/util.h

index fcddfd5ded999a3ac8d5624f150439c402e8f0ed..daabdd7ee543ea7f8bfe61274869ded73f353a94 100644 (file)
@@ -60,6 +60,7 @@ show up in /proc/sys/kernel:
 - panic_on_warn
 - perf_cpu_time_max_percent
 - perf_event_paranoid
+- perf_event_max_stack
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN).  The default value is 2.
 
 ==============================================================
 
+perf_event_max_stack:
+
+Controls maximum number of stack frames to copy for (attr.sample_type &
+PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
+'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 127.
+
+==============================================================
+
 pid_max:
 
 PID allocation wrap value.  When the kernel's next PID value
index 6284779d64ee6394dc11b38cc32e24cefac276b1..b8df45883cf78e36aab24522d0da348335801695 100644 (file)
@@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
        info->address &= ~alignment_mask;
        info->ctrl.len <<= offset;
 
-       if (!bp->overflow_handler) {
+       if (is_default_overflow_handler(bp)) {
                /*
                 * Mismatch breakpoints are required for single-stepping
                 * breakpoints.
@@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
                 * mismatch breakpoint so we can single-step over the
                 * watchpoint trigger.
                 */
-               if (!wp->overflow_handler)
+               if (is_default_overflow_handler(wp))
                        enable_single_step(wp, instruction_pointer(regs));
 
 unlock:
index 4e02ae5950ff6463e4da472066b55ceef9a970be..27563befa8a2df5b27ea39b56bde584771945dfd 100644 (file)
@@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
        tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
-       while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+       while ((entry->nr < sysctl_perf_event_max_stack) &&
               tail && !((unsigned long)tail & 0x3))
                tail = user_backtrace(tail, entry);
 }
index b45c95d34b8323e74992e0a4a56e6da0e1257c60..4ef5373f9a7620dbbcdd4087a125b6f5a7f56c60 100644 (file)
@@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,
                perf_bp_event(bp, regs);
 
                /* Do we need to handle the stepping? */
-               if (!bp->overflow_handler)
+               if (is_default_overflow_handler(bp))
                        step = 1;
 unlock:
                rcu_read_unlock();
@@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
                perf_bp_event(wp, regs);
 
                /* Do we need to handle the stepping? */
-               if (!wp->overflow_handler)
+               if (is_default_overflow_handler(wp))
                        step = 1;
 
 unlock:
index ff4665462a025d4ec2655ca30d49732a63194e53..32c3c6e70119f4e123498b85f1bc28398e333b13 100644 (file)
@@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
 
                tail = (struct frame_tail __user *)regs->regs[29];
 
-               while (entry->nr < PERF_MAX_STACK_DEPTH &&
+               while (entry->nr < sysctl_perf_event_max_stack &&
                       tail && !((unsigned long)tail & 0xf))
                        tail = user_backtrace(tail, entry);
        } else {
@@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
 
                tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
 
-               while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+               while ((entry->nr < sysctl_perf_event_max_stack) &&
                        tail && !((unsigned long)tail & 0x3))
                        tail = compat_user_backtrace(tail, entry);
 #endif
index 315633461a94537c51c96b7ba92420b9715fd523..252abc12a5a31f6221b106b060ffd6926a8a6b5b 100644 (file)
@@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
        --frame;
 
-       while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+       while ((entry->nr < sysctl_perf_event_max_stack) && frame)
                frame = user_backtrace(frame, entry);
 }
 
index c1cf9c6c3f7705b9c50281d196633d91c8e788e5..5021c546ad07d3e28b7d0ac1969448c32b9a93e2 100644 (file)
@@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
                addr = *sp++;
                if (__kernel_text_address(addr)) {
                        perf_callchain_store(entry, addr);
-                       if (entry->nr >= PERF_MAX_STACK_DEPTH)
+                       if (entry->nr >= sysctl_perf_event_max_stack)
                                break;
                }
        }
@@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
        }
        do {
                perf_callchain_store(entry, pc);
-               if (entry->nr >= PERF_MAX_STACK_DEPTH)
+               if (entry->nr >= sysctl_perf_event_max_stack)
                        break;
                pc = unwind_stack(current, &sp, pc, &ra);
        } while (pc);
index e04a6752b39991bbdf5ba389aef524182511fa9c..22d9015c1acc80dea12e78c1ac53f8fcd3b19f0a 100644 (file)
@@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
        sp = regs->gpr[1];
        perf_callchain_store(entry, next_ip);
 
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+       while (entry->nr < sysctl_perf_event_max_stack) {
                fp = (unsigned long __user *) sp;
                if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
                        return;
@@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
        sp = regs->gpr[1];
        perf_callchain_store(entry, next_ip);
 
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+       while (entry->nr < sysctl_perf_event_max_stack) {
                fp = (unsigned int __user *) (unsigned long) sp;
                if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
                        return;
index 6596f66ce1126fa487b7ce8eec19ef17dc4912e7..a4b8b5aed21c7b0fa83b30c7f3783e5c109ccf61 100644 (file)
@@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
                        }
                }
 #endif
-       } while (entry->nr < PERF_MAX_STACK_DEPTH);
+       } while (entry->nr < sysctl_perf_event_max_stack);
 }
 
 static inline int
@@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
                pc = sf.callers_pc;
                ufp = (unsigned long)sf.fp + STACK_BIAS;
                perf_callchain_store(entry, pc);
-       } while (entry->nr < PERF_MAX_STACK_DEPTH);
+       } while (entry->nr < sysctl_perf_event_max_stack);
 }
 
 static void perf_callchain_user_32(struct perf_callchain_entry *entry,
@@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
                        ufp = (unsigned long)sf.fp;
                }
                perf_callchain_store(entry, pc);
-       } while (entry->nr < PERF_MAX_STACK_DEPTH);
+       } while (entry->nr < sysctl_perf_event_max_stack);
 }
 
 void
index 2dc18605831f6e88fd45c2863fc3d1fc7e6b6622..a494fa34713aa846ae89d307f83870f22bfea32c 100644 (file)
@@ -164,10 +164,6 @@ config INSTRUCTION_DECODER
        def_bool y
        depends on KPROBES || PERF_EVENTS || UPROBES
 
-config PERF_EVENTS_INTEL_UNCORE
-       def_bool y
-       depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
-
 config OUTPUT_FORMAT
        string
        default "elf32-i386" if X86_32
@@ -1046,6 +1042,8 @@ config X86_THERMAL_VECTOR
        def_bool y
        depends on X86_MCE_INTEL
 
+source "arch/x86/events/Kconfig"
+
 config X86_LEGACY_VM86
        bool "Legacy VM86 support"
        default n
@@ -1210,15 +1208,6 @@ config MICROCODE_OLD_INTERFACE
        def_bool y
        depends on MICROCODE
 
-config PERF_EVENTS_AMD_POWER
-       depends on PERF_EVENTS && CPU_SUP_AMD
-       tristate "AMD Processor Power Reporting Mechanism"
-       ---help---
-         Provide power reporting mechanism support for AMD processors.
-         Currently, it leverages X86_FEATURE_ACC_POWER
-         (CPUID Fn8000_0007_EDX[12]) interface to calculate the
-         average power consumption on Family 15h processors.
-
 config X86_MSR
        tristate "/dev/cpu/*/msr - Model-specific register support"
        ---help---
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
new file mode 100644 (file)
index 0000000..98397db
--- /dev/null
@@ -0,0 +1,36 @@
+menu "Performance monitoring"
+
+config PERF_EVENTS_INTEL_UNCORE
+       tristate "Intel uncore performance events"
+       depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+       default y
+       ---help---
+       Include support for Intel uncore performance events. These are
+       available on NehalemEX and more modern processors.
+
+config PERF_EVENTS_INTEL_RAPL
+       tristate "Intel rapl performance events"
+       depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+       default y
+       ---help---
+       Include support for Intel rapl performance events for power
+       monitoring on modern processors.
+
+config PERF_EVENTS_INTEL_CSTATE
+       tristate "Intel cstate performance events"
+       depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+       default y
+       ---help---
+       Include support for Intel cstate performance events for power
+       monitoring on modern processors.
+
+config PERF_EVENTS_AMD_POWER
+       depends on PERF_EVENTS && CPU_SUP_AMD
+       tristate "AMD Processor Power Reporting Mechanism"
+       ---help---
+         Provide power reporting mechanism support for AMD processors.
+         Currently, it leverages X86_FEATURE_ACC_POWER
+         (CPUID Fn8000_0007_EDX[12]) interface to calculate the
+         average power consumption on Family 15h processors.
+
+endmenu
index f59618a3999058515027f413e7691fa2f7cdbb02..1d392c39fe560a782ae7fd71296ee0cc64f47fe5 100644 (file)
@@ -6,9 +6,6 @@ obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
 ifdef CONFIG_AMD_IOMMU
 obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
 endif
-obj-$(CONFIG_CPU_SUP_INTEL)            += intel/core.o intel/bts.o intel/cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += intel/cstate.o intel/ds.o intel/knc.o 
-obj-$(CONFIG_CPU_SUP_INTEL)            += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += intel/rapl.o msr.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
+
+obj-$(CONFIG_CPU_SUP_INTEL)            += msr.o
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/
index 3db9569e658c86a794df30454904e9b06de4ea4c..98ac57381bf9bb1b2bea222dd1c6b265e9f40a01 100644 (file)
@@ -263,6 +263,7 @@ static const struct attribute_group *amd_uncore_attr_groups[] = {
 };
 
 static struct pmu amd_nb_pmu = {
+       .task_ctx_nr    = perf_invalid_context,
        .attr_groups    = amd_uncore_attr_groups,
        .name           = "amd_nb",
        .event_init     = amd_uncore_event_init,
@@ -274,6 +275,7 @@ static struct pmu amd_nb_pmu = {
 };
 
 static struct pmu amd_l2_pmu = {
+       .task_ctx_nr    = perf_invalid_context,
        .attr_groups    = amd_uncore_attr_groups,
        .name           = "amd_l2",
        .event_init     = amd_uncore_event_init,
index 041e442a3e2806ed884584758cb8e62abd809e36..5e5e76a52f58cd60678ceb3401cbaa6a32378ede 100644 (file)
@@ -360,6 +360,9 @@ int x86_add_exclusive(unsigned int what)
 {
        int i;
 
+       if (x86_pmu.lbr_pt_coexist)
+               return 0;
+
        if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
                mutex_lock(&pmc_reserve_mutex);
                for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
@@ -380,6 +383,9 @@ fail_unlock:
 
 void x86_del_exclusive(unsigned int what)
 {
+       if (x86_pmu.lbr_pt_coexist)
+               return;
+
        atomic_dec(&x86_pmu.lbr_exclusive[what]);
        atomic_dec(&active_events);
 }
@@ -2277,7 +2283,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
        fp = compat_ptr(ss_base + regs->bp);
        pagefault_disable();
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+       while (entry->nr < sysctl_perf_event_max_stack) {
                unsigned long bytes;
                frame.next_frame     = 0;
                frame.return_address = 0;
@@ -2337,7 +2343,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
                return;
 
        pagefault_disable();
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+       while (entry->nr < sysctl_perf_event_max_stack) {
                unsigned long bytes;
                frame.next_frame             = NULL;
                frame.return_address = 0;
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
new file mode 100644 (file)
index 0000000..3660b2c
--- /dev/null
@@ -0,0 +1,9 @@
+obj-$(CONFIG_CPU_SUP_INTEL)            += core.o bts.o cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)            += ds.o knc.o
+obj-$(CONFIG_CPU_SUP_INTEL)            += lbr.o p4.o p6.o pt.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)   += intel-rapl.o
+intel-rapl-objs                                := rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
+intel-uncore-objs                      := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
+intel-cstate-objs                      := cstate.o
index b99dc9258c0f9ccc319a15e4d6ec170b0ea13e17..0a6e393a2e6298bb9240831714d5915ab26abc5a 100644 (file)
@@ -171,18 +171,6 @@ static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
        memset(page_address(phys->page) + index, 0, phys->size - index);
 }
 
-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
-{
-       if (buf->snapshot)
-               return false;
-
-       if (local_read(&buf->data_size) >= bts->handle.size ||
-           bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
-               return true;
-
-       return false;
-}
-
 static void bts_update(struct bts_ctx *bts)
 {
        int cpu = raw_smp_processor_id();
@@ -213,18 +201,15 @@ static void bts_update(struct bts_ctx *bts)
        }
 }
 
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
+
 static void __bts_event_start(struct perf_event *event)
 {
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
        struct bts_buffer *buf = perf_get_aux(&bts->handle);
        u64 config = 0;
 
-       if (!buf || bts_buffer_is_full(buf, bts))
-               return;
-
-       event->hw.itrace_started = 1;
-       event->hw.state = 0;
-
        if (!buf->snapshot)
                config |= ARCH_PERFMON_EVENTSEL_INT;
        if (!event->attr.exclude_kernel)
@@ -241,16 +226,41 @@ static void __bts_event_start(struct perf_event *event)
        wmb();
 
        intel_pmu_enable_bts(config);
+
 }
 
 static void bts_event_start(struct perf_event *event, int flags)
 {
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct bts_buffer *buf;
+
+       buf = perf_aux_output_begin(&bts->handle, event);
+       if (!buf)
+               goto fail_stop;
+
+       if (bts_buffer_reset(buf, &bts->handle))
+               goto fail_end_stop;
+
+       bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+       bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+       bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+       event->hw.itrace_started = 1;
+       event->hw.state = 0;
 
        __bts_event_start(event);
 
        /* PMI handler: this counter is running and likely generating PMIs */
        ACCESS_ONCE(bts->started) = 1;
+
+       return;
+
+fail_end_stop:
+       perf_aux_output_end(&bts->handle, 0, false);
+
+fail_stop:
+       event->hw.state = PERF_HES_STOPPED;
 }
 
 static void __bts_event_stop(struct perf_event *event)
@@ -269,15 +279,32 @@ static void __bts_event_stop(struct perf_event *event)
 
 static void bts_event_stop(struct perf_event *event, int flags)
 {
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct bts_buffer *buf = perf_get_aux(&bts->handle);
 
        /* PMI handler: don't restart this counter */
        ACCESS_ONCE(bts->started) = 0;
 
        __bts_event_stop(event);
 
-       if (flags & PERF_EF_UPDATE)
+       if (flags & PERF_EF_UPDATE) {
                bts_update(bts);
+
+               if (buf) {
+                       if (buf->snapshot)
+                               bts->handle.head =
+                                       local_xchg(&buf->data_size,
+                                                  buf->nr_pages << PAGE_SHIFT);
+                       perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+                                           !!local_xchg(&buf->lost, 0));
+               }
+
+               cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+               cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+               cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+               cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+       }
 }
 
 void intel_bts_enable_local(void)
@@ -417,34 +444,14 @@ int intel_bts_interrupt(void)
 
 static void bts_event_del(struct perf_event *event, int mode)
 {
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
-
        bts_event_stop(event, PERF_EF_UPDATE);
-
-       if (buf) {
-               if (buf->snapshot)
-                       bts->handle.head =
-                               local_xchg(&buf->data_size,
-                                          buf->nr_pages << PAGE_SHIFT);
-               perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
-                                   !!local_xchg(&buf->lost, 0));
-       }
-
-       cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
-       cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
-       cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
-       cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
 }
 
 static int bts_event_add(struct perf_event *event, int mode)
 {
-       struct bts_buffer *buf;
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
-       int ret = -EBUSY;
 
        event->hw.state = PERF_HES_STOPPED;
 
@@ -454,26 +461,10 @@ static int bts_event_add(struct perf_event *event, int mode)
        if (bts->handle.event)
                return -EBUSY;
 
-       buf = perf_aux_output_begin(&bts->handle, event);
-       if (!buf)
-               return -EINVAL;
-
-       ret = bts_buffer_reset(buf, &bts->handle);
-       if (ret) {
-               perf_aux_output_end(&bts->handle, 0, false);
-               return ret;
-       }
-
-       bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
-       bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
-       bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
-
        if (mode & PERF_EF_START) {
                bts_event_start(event, 0);
-               if (hwc->state & PERF_HES_STOPPED) {
-                       bts_event_del(event, 0);
-                       return -EBUSY;
-               }
+               if (hwc->state & PERF_HES_STOPPED)
+                       return -EINVAL;
        }
 
        return 0;
index a6fd4dbcf820abf727b6118c0084a6877ec0340d..cd319400dc10605653b468a205591e60c5e9bde0 100644 (file)
@@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids
  },
 };
 
+static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
+       EVENT_EXTRA_END
+};
+
+#define GLM_DEMAND_DATA_RD             BIT_ULL(0)
+#define GLM_DEMAND_RFO                 BIT_ULL(1)
+#define GLM_ANY_RESPONSE               BIT_ULL(16)
+#define GLM_SNP_NONE_OR_MISS           BIT_ULL(33)
+#define GLM_DEMAND_READ                        GLM_DEMAND_DATA_RD
+#define GLM_DEMAND_WRITE               GLM_DEMAND_RFO
+#define GLM_DEMAND_PREFETCH            (SNB_PF_DATA_RD|SNB_PF_RFO)
+#define GLM_LLC_ACCESS                 GLM_ANY_RESPONSE
+#define GLM_SNP_ANY                    (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
+#define GLM_LLC_MISS                   (GLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 glm_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(L1D)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = 0x0,
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+       },
+       [C(L1I)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = 0x0380,       /* ICACHE.ACCESSES */
+                       [C(RESULT_MISS)]        = 0x0280,       /* ICACHE.MISSES */
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = -1,
+                       [C(RESULT_MISS)]        = -1,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = 0x0,
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+       },
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
+                       [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
+                       [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
+                       [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
+               },
+       },
+       [C(DTLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = 0x0,
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+       },
+       [C(ITLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = 0x00c0,       /* INST_RETIRED.ANY_P */
+                       [C(RESULT_MISS)]        = 0x0481,       /* ITLB.MISS */
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = -1,
+                       [C(RESULT_MISS)]        = -1,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = -1,
+                       [C(RESULT_MISS)]        = -1,
+               },
+       },
+       [C(BPU)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = 0x00c4,       /* BR_INST_RETIRED.ALL_BRANCHES */
+                       [C(RESULT_MISS)]        = 0x00c5,       /* BR_MISP_RETIRED.ALL_BRANCHES */
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = -1,
+                       [C(RESULT_MISS)]        = -1,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = -1,
+                       [C(RESULT_MISS)]        = -1,
+               },
+       },
+};
+
+static __initconst const u64 glm_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = GLM_DEMAND_READ|
+                                                 GLM_LLC_ACCESS,
+                       [C(RESULT_MISS)]        = GLM_DEMAND_READ|
+                                                 GLM_LLC_MISS,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = GLM_DEMAND_WRITE|
+                                                 GLM_LLC_ACCESS,
+                       [C(RESULT_MISS)]        = GLM_DEMAND_WRITE|
+                                                 GLM_LLC_MISS,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = GLM_DEMAND_PREFETCH|
+                                                 GLM_LLC_ACCESS,
+                       [C(RESULT_MISS)]        = GLM_DEMAND_PREFETCH|
+                                                 GLM_LLC_MISS,
+               },
+       },
+};
+
 #define KNL_OT_L2_HITE         BIT_ULL(19) /* Other Tile L2 Hit */
 #define KNL_OT_L2_HITF         BIT_ULL(20) /* Other Tile L2 Hit */
 #define KNL_MCDRAM_LOCAL       BIT_ULL(21)
@@ -3447,7 +3581,7 @@ __init int intel_pmu_init(void)
                memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
                       sizeof(hw_cache_extra_regs));
 
-               intel_pmu_lbr_init_atom();
+               intel_pmu_lbr_init_slm();
 
                x86_pmu.event_constraints = intel_slm_event_constraints;
                x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
@@ -3456,6 +3590,30 @@ __init int intel_pmu_init(void)
                pr_cont("Silvermont events, ");
                break;
 
+       case 92: /* 14nm Atom "Goldmont" */
+       case 95: /* 14nm Atom "Goldmont Denverton" */
+               memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_skl();
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_glm_extra_regs;
+               /*
+                * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+                * for precise cycles.
+                * :pp is identical to :ppp
+                */
+               x86_pmu.pebs_aliases = NULL;
+               x86_pmu.pebs_prec_dist = true;
+               x86_pmu.lbr_pt_coexist = true;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               pr_cont("Goldmont events, ");
+               break;
+
        case 37: /* 32nm Westmere    */
        case 44: /* 32nm Westmere-EP */
        case 47: /* 32nm Westmere-EX */
index 7946c4231169ff81ed2c22cd538cdf289e8930f2..9ba4e4136a1539ba791052b726c12343694ea85f 100644 (file)
@@ -91,6 +91,8 @@
 #include <asm/cpu_device_id.h>
 #include "../perf_event.h"
 
+MODULE_LICENSE("GPL");
+
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)                \
 static ssize_t __cstate_##_var##_show(struct kobject *kobj,    \
                                struct kobj_attribute *attr,    \
@@ -106,22 +108,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf);
 
+/* Model -> events mapping */
+struct cstate_model {
+       unsigned long           core_events;
+       unsigned long           pkg_events;
+       unsigned long           quirks;
+};
+
+/* Quirk flags */
+#define SLM_PKG_C6_USE_C7_MSR  (1UL << 0)
+
 struct perf_cstate_msr {
        u64     msr;
        struct  perf_pmu_events_attr *attr;
-       bool    (*test)(int idx);
 };
 
 
 /* cstate_core PMU */
-
 static struct pmu cstate_core_pmu;
 static bool has_cstate_core;
 
-enum perf_cstate_core_id {
-       /*
-        * cstate_core events
-        */
+enum perf_cstate_core_events {
        PERF_CSTATE_CORE_C1_RES = 0,
        PERF_CSTATE_CORE_C3_RES,
        PERF_CSTATE_CORE_C6_RES,
@@ -130,69 +137,16 @@ enum perf_cstate_core_id {
        PERF_CSTATE_CORE_EVENT_MAX,
 };
 
-bool test_core(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C1_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
 PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
 PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
 
 static struct perf_cstate_msr core_msr[] = {
-       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1, test_core, },
-       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3, test_core, },
-       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6, test_core, },
-       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7, test_core, },
+       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1 },
+       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3 },
+       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6 },
+       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7 },
 };
 
 static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
@@ -234,18 +188,11 @@ static const struct attribute_group *core_attr_groups[] = {
        NULL,
 };
 
-/* cstate_core PMU end */
-
-
 /* cstate_pkg PMU */
-
 static struct pmu cstate_pkg_pmu;
 static bool has_cstate_pkg;
 
-enum perf_cstate_pkg_id {
-       /*
-        * cstate_pkg events
-        */
+enum perf_cstate_pkg_events {
        PERF_CSTATE_PKG_C2_RES = 0,
        PERF_CSTATE_PKG_C3_RES,
        PERF_CSTATE_PKG_C6_RES,
@@ -257,69 +204,6 @@ enum perf_cstate_pkg_id {
        PERF_CSTATE_PKG_EVENT_MAX,
 };
 
-bool test_pkg(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 69: /* 22nm Haswell ULT */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES ||
-                   idx == PERF_CSTATE_PKG_C8_RES ||
-                   idx == PERF_CSTATE_PKG_C9_RES ||
-                   idx == PERF_CSTATE_PKG_C10_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
 PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
@@ -329,13 +213,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
 PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
 
 static struct perf_cstate_msr pkg_msr[] = {
-       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2,  test_pkg, },
-       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3,  test_pkg, },
-       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6,  test_pkg, },
-       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7,  test_pkg, },
-       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8,  test_pkg, },
-       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9,  test_pkg, },
-       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10, test_pkg, },
+       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2 },
+       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3 },
+       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6 },
+       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7 },
+       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8 },
+       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9 },
+       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10 },
 };
 
 static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
@@ -366,8 +250,6 @@ static const struct attribute_group *pkg_attr_groups[] = {
        NULL,
 };
 
-/* cstate_pkg PMU end*/
-
 static ssize_t cstate_get_attr_cpumask(struct device *dev,
                                       struct device_attribute *attr,
                                       char *buf)
@@ -385,7 +267,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
 static int cstate_pmu_event_init(struct perf_event *event)
 {
        u64 cfg = event->attr.config;
-       int ret = 0;
+       int cpu;
 
        if (event->attr.type != event->pmu->type)
                return -ENOENT;
@@ -400,26 +282,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
            event->attr.sample_period) /* no sampling */
                return -EINVAL;
 
+       if (event->cpu < 0)
+               return -EINVAL;
+
        if (event->pmu == &cstate_core_pmu) {
                if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
                        return -EINVAL;
                if (!core_msr[cfg].attr)
                        return -EINVAL;
                event->hw.event_base = core_msr[cfg].msr;
+               cpu = cpumask_any_and(&cstate_core_cpu_mask,
+                                     topology_sibling_cpumask(event->cpu));
        } else if (event->pmu == &cstate_pkg_pmu) {
                if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
                        return -EINVAL;
                if (!pkg_msr[cfg].attr)
                        return -EINVAL;
                event->hw.event_base = pkg_msr[cfg].msr;
-       } else
+               cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
+                                     topology_core_cpumask(event->cpu));
+       } else {
                return -ENOENT;
+       }
+
+       if (cpu >= nr_cpu_ids)
+               return -ENODEV;
 
-       /* must be done before validate_group */
+       event->cpu = cpu;
        event->hw.config = cfg;
        event->hw.idx = -1;
-
-       return ret;
+       return 0;
 }
 
 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
@@ -469,172 +361,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
        return 0;
 }
 
+/*
+ * Check if exiting cpu is the designated reader. If so migrate the
+ * events when there is a valid target available
+ */
 static void cstate_cpu_exit(int cpu)
 {
-       int i, id, target;
+       unsigned int target;
 
-       /* cpu exit for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_core_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+       if (has_cstate_core &&
+           cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
+
+               target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+               /* Migrate events if there is a valid target */
+               if (target < nr_cpu_ids) {
                        cpumask_set_cpu(target, &cstate_core_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
-               if (target >= 0)
                        perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+               }
        }
 
-       /* cpu exit for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_physical_package_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+       if (has_cstate_pkg &&
+           cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
+
+               target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+               /* Migrate events if there is a valid target */
+               if (target < nr_cpu_ids) {
                        cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
-               if (target >= 0)
                        perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+               }
        }
 }
 
 static void cstate_cpu_init(int cpu)
 {
-       int i, id;
+       unsigned int target;
 
-       /* cpu init for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               for_each_cpu(i, &cstate_core_cpu_mask) {
-                       if (id == topology_core_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
-       }
+       /*
+        * If this is the first online thread of that core, set it in
+        * the core cpu mask as the designated reader.
+        */
+       target = cpumask_any_and(&cstate_core_cpu_mask,
+                                topology_sibling_cpumask(cpu));
 
-       /* cpu init for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               for_each_cpu(i, &cstate_pkg_cpu_mask) {
-                       if (id == topology_physical_package_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-       }
+       if (has_cstate_core && target >= nr_cpu_ids)
+               cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+
+       /*
+        * If this is the first online thread of that package, set it
+        * in the package cpu mask as the designated reader.
+        */
+       target = cpumask_any_and(&cstate_pkg_cpu_mask,
+                                topology_core_cpumask(cpu));
+       if (has_cstate_pkg && target >= nr_cpu_ids)
+               cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
 }
 
 static int cstate_cpu_notifier(struct notifier_block *self,
-                                 unsigned long action, void *hcpu)
+                              unsigned long action, void *hcpu)
 {
        unsigned int cpu = (long)hcpu;
 
        switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               break;
        case CPU_STARTING:
                cstate_cpu_init(cpu);
                break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               break;
        case CPU_DOWN_PREPARE:
                cstate_cpu_exit(cpu);
                break;
        default:
                break;
        }
-
        return NOTIFY_OK;
 }
 
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
-                            struct attribute   **events_attrs,
-                            int max_event_nr)
-{
-       int i, j = 0;
-       u64 val;
-
-       /* Probe the cstate events. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
-                       msr[i].attr = NULL;
-       }
-
-       /* List remaining events in the sysfs attrs. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (msr[i].attr)
-                       events_attrs[j++] = &msr[i].attr->attr.attr;
-       }
-       events_attrs[j] = NULL;
-
-       return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
-       /* SLM has different MSR for PKG C6 */
-       switch (boot_cpu_data.x86_model) {
-       case 55:
-       case 76:
-       case 77:
-               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
-       }
-
-       if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
-               has_cstate_core = true;
-
-       if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
-               has_cstate_pkg = true;
-
-       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
-       int cpu;
-
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu)
-               cstate_cpu_init(cpu);
-
-       __perf_cpu_notifier(cstate_cpu_notifier);
-
-       cpu_notifier_register_done();
-}
+static struct notifier_block cstate_cpu_nb = {
+       .notifier_call  = cstate_cpu_notifier,
+       .priority       = CPU_PRI_PERF + 1,
+};
 
 static struct pmu cstate_core_pmu = {
        .attr_groups    = core_attr_groups,
        .name           = "cstate_core",
        .task_ctx_nr    = perf_invalid_context,
        .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
+       .add            = cstate_pmu_event_add,
+       .del            = cstate_pmu_event_del,
        .start          = cstate_pmu_event_start,
        .stop           = cstate_pmu_event_stop,
        .read           = cstate_pmu_event_update,
@@ -646,49 +457,203 @@ static struct pmu cstate_pkg_pmu = {
        .name           = "cstate_pkg",
        .task_ctx_nr    = perf_invalid_context,
        .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
+       .add            = cstate_pmu_event_add,
+       .del            = cstate_pmu_event_del,
        .start          = cstate_pmu_event_start,
        .stop           = cstate_pmu_event_stop,
        .read           = cstate_pmu_event_update,
        .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
 };
 
-static void __init cstate_pmus_register(void)
+static const struct cstate_model nhm_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES) |
+                                 BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model snb_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES) |
+                                 BIT(PERF_CSTATE_CORE_C7_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
+                                 BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES) |
+                                 BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model hswult_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C3_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES) |
+                                 BIT(PERF_CSTATE_CORE_C7_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
+                                 BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES) |
+                                 BIT(PERF_CSTATE_PKG_C7_RES) |
+                                 BIT(PERF_CSTATE_PKG_C8_RES) |
+                                 BIT(PERF_CSTATE_PKG_C9_RES) |
+                                 BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
+static const struct cstate_model slm_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C1_RES) |
+                                 BIT(PERF_CSTATE_CORE_C6_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C6_RES),
+       .quirks                 = SLM_PKG_C6_USE_C7_MSR,
+};
+
+#define X86_CSTATES_MODEL(model, states)                               \
+       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
+
+static const struct x86_cpu_id intel_cstates_match[] __initconst = {
+       X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
+       X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
+       X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
+
+       X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
+       X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
+       X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
+
+       X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
+       X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
+
+       X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
+       X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
+
+       X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
+       X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
+       X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
+
+       X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
+
+       X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
+       X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
+       X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
+
+       X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
+       X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
+       X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
+       X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
+
+       X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
+       X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
+       { },
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there are no available events.
+ */
+static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
+                                   struct perf_cstate_msr *msr,
+                                   struct attribute **attrs)
 {
-       int err;
+       bool found = false;
+       unsigned int bit;
+       u64 val;
+
+       for (bit = 0; bit < max; bit++) {
+               if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
+                       *attrs++ = &msr[bit].attr->attr.attr;
+                       found = true;
+               } else {
+                       msr[bit].attr = NULL;
+               }
+       }
+       *attrs = NULL;
+
+       return found;
+}
+
+static int __init cstate_probe(const struct cstate_model *cm)
+{
+       /* SLM has different MSR for PKG C6 */
+       if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
+               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+
+       has_cstate_core = cstate_probe_msr(cm->core_events,
+                                          PERF_CSTATE_CORE_EVENT_MAX,
+                                          core_msr, core_events_attrs);
+
+       has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
+                                         PERF_CSTATE_PKG_EVENT_MAX,
+                                         pkg_msr, pkg_events_attrs);
+
+       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static inline void cstate_cleanup(void)
+{
+       if (has_cstate_core)
+               perf_pmu_unregister(&cstate_core_pmu);
+
+       if (has_cstate_pkg)
+               perf_pmu_unregister(&cstate_pkg_pmu);
+}
+
+static int __init cstate_init(void)
+{
+       int cpu, err;
+
+       cpu_notifier_register_begin();
+       for_each_online_cpu(cpu)
+               cstate_cpu_init(cpu);
 
        if (has_cstate_core) {
                err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_core_pmu.name, err);
+               if (err) {
+                       has_cstate_core = false;
+                       pr_info("Failed to register cstate core pmu\n");
+                       goto out;
+               }
        }
 
        if (has_cstate_pkg) {
                err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_pkg_pmu.name, err);
+               if (err) {
+                       has_cstate_pkg = false;
+                       pr_info("Failed to register cstate pkg pmu\n");
+                       cstate_cleanup();
+                       goto out;
+               }
        }
+       __register_cpu_notifier(&cstate_cpu_nb);
+out:
+       cpu_notifier_register_done();
+       return err;
 }
 
 static int __init cstate_pmu_init(void)
 {
+       const struct x86_cpu_id *id;
        int err;
 
-       if (cpu_has_hypervisor)
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+               return -ENODEV;
+
+       id = x86_match_cpu(intel_cstates_match);
+       if (!id)
                return -ENODEV;
 
-       err = cstate_init();
+       err = cstate_probe((const struct cstate_model *) id->driver_data);
        if (err)
                return err;
 
-       cstate_cpumask_init();
-
-       cstate_pmus_register();
-
-       return 0;
+       return cstate_init();
 }
+module_init(cstate_pmu_init);
 
-device_initcall(cstate_pmu_init);
+static void __exit cstate_pmu_exit(void)
+{
+       cpu_notifier_register_begin();
+       __unregister_cpu_notifier(&cstate_cpu_nb);
+       cstate_cleanup();
+       cpu_notifier_register_done();
+}
+module_exit(cstate_pmu_exit);
index 8584b90d8e0bb08e8e9ddc3dd7b6f38dae5363c2..7ce9f3f669e63d2bd4bbf15db801cf8bc5fa7565 100644 (file)
@@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
        EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_glm_pebs_event_constraints[] = {
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
        INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
index 1ca5d1e7d4f253429fc1c44968d8219dff4086cd..9e2b40cdb05f8c68061e43a14b04aa3aace3429c 100644 (file)
@@ -14,7 +14,8 @@ enum {
        LBR_FORMAT_EIP_FLAGS    = 0x03,
        LBR_FORMAT_EIP_FLAGS2   = 0x04,
        LBR_FORMAT_INFO         = 0x05,
-       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
+       LBR_FORMAT_TIME         = 0x06,
+       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
 };
 
 static enum {
@@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
                        abort = !!(info & LBR_INFO_ABORT);
                        cycles = (info & LBR_INFO_CYCLES);
                }
+
+               if (lbr_format == LBR_FORMAT_TIME) {
+                       mis = !!(from & LBR_FROM_FLAG_MISPRED);
+                       pred = !mis;
+                       skip = 1;
+                       cycles = ((to >> 48) & LBR_INFO_CYCLES);
+
+                       to = (u64)((((s64)to) << 16) >> 16);
+               }
+
                if (lbr_flags & LBR_EIP_FLAGS) {
                        mis = !!(from & LBR_FROM_FLAG_MISPRED);
                        pred = !mis;
@@ -1049,6 +1060,24 @@ void __init intel_pmu_lbr_init_atom(void)
        pr_cont("8-deep LBR, ");
 }
 
+/* slm */
+void __init intel_pmu_lbr_init_slm(void)
+{
+       x86_pmu.lbr_nr     = 8;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - compensate for lack of HW filter
+        */
+       pr_cont("8-deep LBR, ");
+}
+
 /* Knights Landing */
 void intel_pmu_lbr_init_knl(void)
 {
index 09a77dbc73c93110a40d2afbf2dedc86e50ea44f..54fa238d84d558af116b92cac732568de0478f4f 100644 (file)
@@ -67,11 +67,13 @@ static struct pt_cap_desc {
        PT_CAP(max_subleaf,             0, CR_EAX, 0xffffffff),
        PT_CAP(cr3_filtering,           0, CR_EBX, BIT(0)),
        PT_CAP(psb_cyc,                 0, CR_EBX, BIT(1)),
+       PT_CAP(ip_filtering,            0, CR_EBX, BIT(2)),
        PT_CAP(mtc,                     0, CR_EBX, BIT(3)),
        PT_CAP(topa_output,             0, CR_ECX, BIT(0)),
        PT_CAP(topa_multiple_entries,   0, CR_ECX, BIT(1)),
        PT_CAP(single_range_output,     0, CR_ECX, BIT(2)),
        PT_CAP(payloads_lip,            0, CR_ECX, BIT(31)),
+       PT_CAP(num_address_ranges,      1, CR_EAX, 0x3),
        PT_CAP(mtc_periods,             1, CR_EAX, 0xffff0000),
        PT_CAP(cycle_thresholds,        1, CR_EBX, 0xffff),
        PT_CAP(psb_periods,             1, CR_EBX, 0xffff0000),
@@ -125,9 +127,46 @@ static struct attribute_group pt_format_group = {
        .attrs  = pt_formats_attr,
 };
 
+static ssize_t
+pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
+                   char *page)
+{
+       struct perf_pmu_events_attr *pmu_attr =
+               container_of(attr, struct perf_pmu_events_attr, attr);
+
+       switch (pmu_attr->id) {
+       case 0:
+               return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
+       case 1:
+               return sprintf(page, "%u:%u\n",
+                              pt_pmu.tsc_art_num,
+                              pt_pmu.tsc_art_den);
+       default:
+               break;
+       }
+
+       return -EINVAL;
+}
+
+PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
+              pt_timing_attr_show);
+PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
+              pt_timing_attr_show);
+
+static struct attribute *pt_timing_attr[] = {
+       &timing_attr_max_nonturbo_ratio.attr.attr,
+       &timing_attr_tsc_art_ratio.attr.attr,
+       NULL,
+};
+
+static struct attribute_group pt_timing_group = {
+       .attrs  = pt_timing_attr,
+};
+
 static const struct attribute_group *pt_attr_groups[] = {
        &pt_cap_group,
        &pt_format_group,
+       &pt_timing_group,
        NULL,
 };
 
@@ -140,6 +179,23 @@ static int __init pt_pmu_hw_init(void)
        int ret;
        long i;
 
+       rdmsrl(MSR_PLATFORM_INFO, reg);
+       pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
+
+       /*
+        * if available, read in TSC to core crystal clock ratio,
+        * otherwise, zero for numerator stands for "not enumerated"
+        * as per SDM
+        */
+       if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
+               u32 eax, ebx, ecx, edx;
+
+               cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
+
+               pt_pmu.tsc_art_num = ebx;
+               pt_pmu.tsc_art_den = eax;
+       }
+
        if (boot_cpu_has(X86_FEATURE_VMX)) {
                /*
                 * Intel SDM, 36.5 "Tracing post-VMXON" says that
@@ -263,6 +319,75 @@ static bool pt_event_valid(struct perf_event *event)
  * These all are cpu affine and operate on a local PT
  */
 
+/* Address ranges and their corresponding msr configuration registers */
+static const struct pt_address_range {
+       unsigned long   msr_a;
+       unsigned long   msr_b;
+       unsigned int    reg_off;
+} pt_address_ranges[] = {
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR0_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR0_B,
+               .reg_off = RTIT_CTL_ADDR0_OFFSET,
+       },
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR1_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR1_B,
+               .reg_off = RTIT_CTL_ADDR1_OFFSET,
+       },
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR2_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR2_B,
+               .reg_off = RTIT_CTL_ADDR2_OFFSET,
+       },
+       {
+               .msr_a   = MSR_IA32_RTIT_ADDR3_A,
+               .msr_b   = MSR_IA32_RTIT_ADDR3_B,
+               .reg_off = RTIT_CTL_ADDR3_OFFSET,
+       }
+};
+
+static u64 pt_config_filters(struct perf_event *event)
+{
+       struct pt_filters *filters = event->hw.addr_filters;
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       unsigned int range = 0;
+       u64 rtit_ctl = 0;
+
+       if (!filters)
+               return 0;
+
+       perf_event_addr_filters_sync(event);
+
+       for (range = 0; range < filters->nr_filters; range++) {
+               struct pt_filter *filter = &filters->filter[range];
+
+               /*
+                * Note, if the range has zero start/end addresses due
+                * to its dynamic object not being loaded yet, we just
+                * go ahead and program zeroed range, which will simply
+                * produce no data. Note^2: if executable code at 0x0
+                * is a concern, we can set up an "invalid" configuration
+                * such as msr_b < msr_a.
+                */
+
+               /* avoid redundant msr writes */
+               if (pt->filters.filter[range].msr_a != filter->msr_a) {
+                       wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+                       pt->filters.filter[range].msr_a = filter->msr_a;
+               }
+
+               if (pt->filters.filter[range].msr_b != filter->msr_b) {
+                       wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+                       pt->filters.filter[range].msr_b = filter->msr_b;
+               }
+
+               rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+       }
+
+       return rtit_ctl;
+}
+
 static void pt_config(struct perf_event *event)
 {
        u64 reg;
@@ -272,7 +397,8 @@ static void pt_config(struct perf_event *event)
                wrmsrl(MSR_IA32_RTIT_STATUS, 0);
        }
 
-       reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+       reg = pt_config_filters(event);
+       reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
 
        if (!event->attr.exclude_kernel)
                reg |= RTIT_CTL_OS;
@@ -919,24 +1045,80 @@ static void pt_buffer_free_aux(void *data)
        kfree(buf);
 }
 
-/**
- * pt_buffer_is_full() - check if the buffer is full
- * @buf:       PT buffer.
- * @pt:                Per-cpu pt handle.
- *
- * If the user hasn't read data from the output region that aux_head
- * points to, the buffer is considered full: the user needs to read at
- * least this region and update aux_tail to point past it.
- */
-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+static int pt_addr_filters_init(struct perf_event *event)
 {
-       if (buf->snapshot)
-               return false;
+       struct pt_filters *filters;
+       int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+       if (!pt_cap_get(PT_CAP_num_address_ranges))
+               return 0;
+
+       filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
+       if (!filters)
+               return -ENOMEM;
+
+       if (event->parent)
+               memcpy(filters, event->parent->hw.addr_filters,
+                      sizeof(*filters));
+
+       event->hw.addr_filters = filters;
+
+       return 0;
+}
+
+static void pt_addr_filters_fini(struct perf_event *event)
+{
+       kfree(event->hw.addr_filters);
+       event->hw.addr_filters = NULL;
+}
+
+static int pt_event_addr_filters_validate(struct list_head *filters)
+{
+       struct perf_addr_filter *filter;
+       int range = 0;
+
+       list_for_each_entry(filter, filters, entry) {
+               /* PT doesn't support single address triggers */
+               if (!filter->range)
+                       return -EOPNOTSUPP;
+
+               if (!filter->inode && !kernel_ip(filter->offset))
+                       return -EINVAL;
+
+               if (++range > pt_cap_get(PT_CAP_num_address_ranges))
+                       return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static void pt_event_addr_filters_sync(struct perf_event *event)
+{
+       struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+       unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
+       struct pt_filters *filters = event->hw.addr_filters;
+       struct perf_addr_filter *filter;
+       int range = 0;
+
+       if (!filters)
+               return;
 
-       if (local_read(&buf->data_size) >= pt->handle.size)
-               return true;
+       list_for_each_entry(filter, &head->list, entry) {
+               if (filter->inode && !offs[range]) {
+                       msr_a = msr_b = 0;
+               } else {
+                       /* apply the offset */
+                       msr_a = filter->offset + offs[range];
+                       msr_b = filter->size + msr_a;
+               }
+
+               filters->filter[range].msr_a  = msr_a;
+               filters->filter[range].msr_b  = msr_b;
+               filters->filter[range].config = filter->filter ? 1 : 2;
+               range++;
+       }
 
-       return false;
+       filters->nr_filters = range;
 }
 
 /**
@@ -953,7 +1135,7 @@ void intel_pt_interrupt(void)
         * after PT has been disabled by pt_event_stop(). Make sure we don't
         * do anything (particularly, re-enable) for this event here.
         */
-       if (!ACCESS_ONCE(pt->handle_nmi))
+       if (!READ_ONCE(pt->handle_nmi))
                return;
 
        /*
@@ -1038,23 +1220,36 @@ EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
 
 static void pt_event_start(struct perf_event *event, int mode)
 {
+       struct hw_perf_event *hwc = &event->hw;
        struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf = perf_get_aux(&pt->handle);
+       struct pt_buffer *buf;
 
        if (READ_ONCE(pt->vmx_on))
                return;
 
-       if (!buf || pt_buffer_is_full(buf, pt)) {
-               event->hw.state = PERF_HES_STOPPED;
-               return;
+       buf = perf_aux_output_begin(&pt->handle, event);
+       if (!buf)
+               goto fail_stop;
+
+       pt_buffer_reset_offsets(buf, pt->handle.head);
+       if (!buf->snapshot) {
+               if (pt_buffer_reset_markers(buf, &pt->handle))
+                       goto fail_end_stop;
        }
 
-       ACCESS_ONCE(pt->handle_nmi) = 1;
-       event->hw.state = 0;
+       WRITE_ONCE(pt->handle_nmi, 1);
+       hwc->state = 0;
 
        pt_config_buffer(buf->cur->table, buf->cur_idx,
                         buf->output_off);
        pt_config(event);
+
+       return;
+
+fail_end_stop:
+       perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+       hwc->state = PERF_HES_STOPPED;
 }
 
 static void pt_event_stop(struct perf_event *event, int mode)
@@ -1065,7 +1260,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
         * Protect against the PMI racing with disabling wrmsr,
         * see comment in intel_pt_interrupt().
         */
-       ACCESS_ONCE(pt->handle_nmi) = 0;
+       WRITE_ONCE(pt->handle_nmi, 0);
 
        pt_config_stop(event);
 
@@ -1088,19 +1283,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
                pt_handle_status(pt);
 
                pt_update_head(pt);
-       }
-}
-
-static void pt_event_del(struct perf_event *event, int mode)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf;
 
-       pt_event_stop(event, PERF_EF_UPDATE);
-
-       buf = perf_get_aux(&pt->handle);
-
-       if (buf) {
                if (buf->snapshot)
                        pt->handle.head =
                                local_xchg(&buf->data_size,
@@ -1110,9 +1293,13 @@ static void pt_event_del(struct perf_event *event, int mode)
        }
 }
 
+static void pt_event_del(struct perf_event *event, int mode)
+{
+       pt_event_stop(event, PERF_EF_UPDATE);
+}
+
 static int pt_event_add(struct perf_event *event, int mode)
 {
-       struct pt_buffer *buf;
        struct pt *pt = this_cpu_ptr(&pt_ctx);
        struct hw_perf_event *hwc = &event->hw;
        int ret = -EBUSY;
@@ -1120,34 +1307,18 @@ static int pt_event_add(struct perf_event *event, int mode)
        if (pt->handle.event)
                goto fail;
 
-       buf = perf_aux_output_begin(&pt->handle, event);
-       ret = -EINVAL;
-       if (!buf)
-               goto fail_stop;
-
-       pt_buffer_reset_offsets(buf, pt->handle.head);
-       if (!buf->snapshot) {
-               ret = pt_buffer_reset_markers(buf, &pt->handle);
-               if (ret)
-                       goto fail_end_stop;
-       }
-
        if (mode & PERF_EF_START) {
                pt_event_start(event, 0);
-               ret = -EBUSY;
+               ret = -EINVAL;
                if (hwc->state == PERF_HES_STOPPED)
-                       goto fail_end_stop;
+                       goto fail;
        } else {
                hwc->state = PERF_HES_STOPPED;
        }
 
-       return 0;
-
-fail_end_stop:
-       perf_aux_output_end(&pt->handle, 0, true);
-fail_stop:
-       hwc->state = PERF_HES_STOPPED;
+       ret = 0;
 fail:
+
        return ret;
 }
 
@@ -1157,6 +1328,7 @@ static void pt_event_read(struct perf_event *event)
 
 static void pt_event_destroy(struct perf_event *event)
 {
+       pt_addr_filters_fini(event);
        x86_del_exclusive(x86_lbr_exclusive_pt);
 }
 
@@ -1171,6 +1343,11 @@ static int pt_event_init(struct perf_event *event)
        if (x86_add_exclusive(x86_lbr_exclusive_pt))
                return -EBUSY;
 
+       if (pt_addr_filters_init(event)) {
+               x86_del_exclusive(x86_lbr_exclusive_pt);
+               return -ENOMEM;
+       }
+
        event->destroy = pt_event_destroy;
 
        return 0;
@@ -1190,7 +1367,7 @@ static __init int pt_init(void)
 
        BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
 
-       if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+       if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
                return -ENODEV;
 
        get_online_cpus();
@@ -1224,16 +1401,21 @@ static __init int pt_init(void)
                        PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
 
        pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
-       pt_pmu.pmu.attr_groups  = pt_attr_groups;
-       pt_pmu.pmu.task_ctx_nr  = perf_sw_context;
-       pt_pmu.pmu.event_init   = pt_event_init;
-       pt_pmu.pmu.add          = pt_event_add;
-       pt_pmu.pmu.del          = pt_event_del;
-       pt_pmu.pmu.start        = pt_event_start;
-       pt_pmu.pmu.stop         = pt_event_stop;
-       pt_pmu.pmu.read         = pt_event_read;
-       pt_pmu.pmu.setup_aux    = pt_buffer_setup_aux;
-       pt_pmu.pmu.free_aux     = pt_buffer_free_aux;
+       pt_pmu.pmu.attr_groups           = pt_attr_groups;
+       pt_pmu.pmu.task_ctx_nr           = perf_sw_context;
+       pt_pmu.pmu.event_init            = pt_event_init;
+       pt_pmu.pmu.add                   = pt_event_add;
+       pt_pmu.pmu.del                   = pt_event_del;
+       pt_pmu.pmu.start                 = pt_event_start;
+       pt_pmu.pmu.stop                  = pt_event_stop;
+       pt_pmu.pmu.read                  = pt_event_read;
+       pt_pmu.pmu.setup_aux             = pt_buffer_setup_aux;
+       pt_pmu.pmu.free_aux              = pt_buffer_free_aux;
+       pt_pmu.pmu.addr_filters_sync     = pt_event_addr_filters_sync;
+       pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
+       pt_pmu.pmu.nr_addr_filters       =
+               pt_cap_get(PT_CAP_num_address_ranges);
+
        ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
 
        return ret;
index 3abb5f5cccc87d0a00cd4103cdad0f2674ca8423..efffa4a09f687f9578a6d8b99a89b3d2595ded4c 100644 (file)
 #ifndef __INTEL_PT_H__
 #define __INTEL_PT_H__
 
+/*
+ * PT MSR bit definitions
+ */
+#define RTIT_CTL_TRACEEN               BIT(0)
+#define RTIT_CTL_CYCLEACC              BIT(1)
+#define RTIT_CTL_OS                    BIT(2)
+#define RTIT_CTL_USR                   BIT(3)
+#define RTIT_CTL_CR3EN                 BIT(7)
+#define RTIT_CTL_TOPA                  BIT(8)
+#define RTIT_CTL_MTC_EN                        BIT(9)
+#define RTIT_CTL_TSC_EN                        BIT(10)
+#define RTIT_CTL_DISRETC               BIT(11)
+#define RTIT_CTL_BRANCH_EN             BIT(13)
+#define RTIT_CTL_MTC_RANGE_OFFSET      14
+#define RTIT_CTL_MTC_RANGE             (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
+#define RTIT_CTL_CYC_THRESH_OFFSET     19
+#define RTIT_CTL_CYC_THRESH            (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
+#define RTIT_CTL_PSB_FREQ_OFFSET       24
+#define RTIT_CTL_PSB_FREQ                      (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
+#define RTIT_CTL_ADDR0_OFFSET          32
+#define RTIT_CTL_ADDR0                 (0x0full << RTIT_CTL_ADDR0_OFFSET)
+#define RTIT_CTL_ADDR1_OFFSET          36
+#define RTIT_CTL_ADDR1                 (0x0full << RTIT_CTL_ADDR1_OFFSET)
+#define RTIT_CTL_ADDR2_OFFSET          40
+#define RTIT_CTL_ADDR2                 (0x0full << RTIT_CTL_ADDR2_OFFSET)
+#define RTIT_CTL_ADDR3_OFFSET          44
+#define RTIT_CTL_ADDR3                 (0x0full << RTIT_CTL_ADDR3_OFFSET)
+#define RTIT_STATUS_FILTEREN           BIT(0)
+#define RTIT_STATUS_CONTEXTEN          BIT(1)
+#define RTIT_STATUS_TRIGGEREN          BIT(2)
+#define RTIT_STATUS_BUFFOVF            BIT(3)
+#define RTIT_STATUS_ERROR              BIT(4)
+#define RTIT_STATUS_STOPPED            BIT(5)
+
 /*
  * Single-entry ToPA: when this close to region boundary, switch
  * buffers to avoid losing data.
@@ -48,15 +82,20 @@ struct topa_entry {
 #define PT_CPUID_LEAVES                2
 #define PT_CPUID_REGS_NUM      4 /* number of regsters (eax, ebx, ecx, edx) */
 
+/* TSC to Core Crystal Clock Ratio */
+#define CPUID_TSC_LEAF         0x15
+
 enum pt_capabilities {
        PT_CAP_max_subleaf = 0,
        PT_CAP_cr3_filtering,
        PT_CAP_psb_cyc,
+       PT_CAP_ip_filtering,
        PT_CAP_mtc,
        PT_CAP_topa_output,
        PT_CAP_topa_multiple_entries,
        PT_CAP_single_range_output,
        PT_CAP_payloads_lip,
+       PT_CAP_num_address_ranges,
        PT_CAP_mtc_periods,
        PT_CAP_cycle_thresholds,
        PT_CAP_psb_periods,
@@ -66,6 +105,9 @@ struct pt_pmu {
        struct pmu              pmu;
        u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
        bool                    vmx;
+       unsigned long           max_nonturbo_ratio;
+       unsigned int            tsc_art_num;
+       unsigned int            tsc_art_den;
 };
 
 /**
@@ -104,14 +146,40 @@ struct pt_buffer {
        struct topa_entry       *topa_index[0];
 };
 
+#define PT_FILTERS_NUM 4
+
+/**
+ * struct pt_filter - IP range filter configuration
+ * @msr_a:     range start, goes to RTIT_ADDRn_A
+ * @msr_b:     range end, goes to RTIT_ADDRn_B
+ * @config:    4-bit field in RTIT_CTL
+ */
+struct pt_filter {
+       unsigned long   msr_a;
+       unsigned long   msr_b;
+       unsigned long   config;
+};
+
+/**
+ * struct pt_filters - IP range filtering context
+ * @filter:    filters defined for this context
+ * @nr_filters:        number of defined filters in the @filter array
+ */
+struct pt_filters {
+       struct pt_filter        filter[PT_FILTERS_NUM];
+       unsigned int            nr_filters;
+};
+
 /**
  * struct pt - per-cpu pt context
  * @handle:    perf output handle
+ * @filters:           last configured filters
  * @handle_nmi:        do handle PT PMI on this cpu, there's an active event
  * @vmx_on:    1 if VMX is ON on this cpu
  */
 struct pt {
        struct perf_output_handle handle;
+       struct pt_filters       filters;
        int                     handle_nmi;
        int                     vmx_on;
 };
index 1705c9d75e4477e1c246d608fa1e9ebb36f6f50b..99c4bab123cdae71b14ab92241d81dbf15616fe1 100644 (file)
  *       event: rapl_energy_dram
  *    perf code: 0x3
  *
- * dram counter: consumption of the builtin-gpu domain (client only)
+ * gpu counter: consumption of the builtin-gpu domain (client only)
  *       event: rapl_energy_gpu
  *    perf code: 0x4
  *
+ *  psys counter: consumption of the builtin-psys domain (client only)
+ *       event: rapl_energy_psys
+ *    perf code: 0x5
+ *
  * We manage those counters as free running (read-only). They may be
  * use simultaneously by other tools, such as turbostat.
  *
@@ -53,6 +57,8 @@
 #include <asm/cpu_device_id.h>
 #include "../perf_event.h"
 
+MODULE_LICENSE("GPL");
+
 /*
  * RAPL energy status counters
  */
 #define INTEL_RAPL_RAM         0x3     /* pseudo-encoding */
 #define RAPL_IDX_PP1_NRG_STAT  3       /* gpu */
 #define INTEL_RAPL_PP1         0x4     /* pseudo-encoding */
+#define RAPL_IDX_PSYS_NRG_STAT 4       /* psys */
+#define INTEL_RAPL_PSYS                0x5     /* pseudo-encoding */
 
-#define NR_RAPL_DOMAINS         0x4
+#define NR_RAPL_DOMAINS         0x5
 static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
        "pp0-core",
        "package",
        "dram",
        "pp1-gpu",
+       "psys",
 };
 
 /* Clients have PP0, PKG */
@@ -89,6 +98,13 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
                         1<<RAPL_IDX_RAM_NRG_STAT|\
                         1<<RAPL_IDX_PP1_NRG_STAT)
 
+/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
+#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
+                         1<<RAPL_IDX_PKG_NRG_STAT|\
+                         1<<RAPL_IDX_RAM_NRG_STAT|\
+                         1<<RAPL_IDX_PP1_NRG_STAT|\
+                         1<<RAPL_IDX_PSYS_NRG_STAT)
+
 /* Knights Landing has PKG, RAM */
 #define RAPL_IDX_KNL   (1<<RAPL_IDX_PKG_NRG_STAT|\
                         1<<RAPL_IDX_RAM_NRG_STAT)
@@ -360,6 +376,10 @@ static int rapl_pmu_event_init(struct perf_event *event)
                bit = RAPL_IDX_PP1_NRG_STAT;
                msr = MSR_PP1_ENERGY_STATUS;
                break;
+       case INTEL_RAPL_PSYS:
+               bit = RAPL_IDX_PSYS_NRG_STAT;
+               msr = MSR_PLATFORM_ENERGY_STATUS;
+               break;
        default:
                return -EINVAL;
        }
@@ -414,11 +434,13 @@ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
 RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
 RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
 RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+RAPL_EVENT_ATTR_STR(energy-psys,   rapl_psys, "event=0x05");
 
 RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
 RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
 RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
 RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-psys.unit,   rapl_psys_unit, "Joules");
 
 /*
  * we compute in 0.23 nJ increments regardless of MSR
@@ -427,6 +449,7 @@ RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890
 RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
 RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
 RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
 
 static struct attribute *rapl_events_srv_attr[] = {
        EVENT_PTR(rapl_cores),
@@ -476,6 +499,27 @@ static struct attribute *rapl_events_hsw_attr[] = {
        NULL,
 };
 
+static struct attribute *rapl_events_skl_attr[] = {
+       EVENT_PTR(rapl_cores),
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_gpu),
+       EVENT_PTR(rapl_ram),
+       EVENT_PTR(rapl_psys),
+
+       EVENT_PTR(rapl_cores_unit),
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_gpu_unit),
+       EVENT_PTR(rapl_ram_unit),
+       EVENT_PTR(rapl_psys_unit),
+
+       EVENT_PTR(rapl_cores_scale),
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_gpu_scale),
+       EVENT_PTR(rapl_ram_scale),
+       EVENT_PTR(rapl_psys_scale),
+       NULL,
+};
+
 static struct attribute *rapl_events_knl_attr[] = {
        EVENT_PTR(rapl_pkg),
        EVENT_PTR(rapl_ram),
@@ -592,6 +636,11 @@ static int rapl_cpu_notifier(struct notifier_block *self,
        return NOTIFY_OK;
 }
 
+static struct notifier_block rapl_cpu_nb = {
+       .notifier_call  = rapl_cpu_notifier,
+       .priority       = CPU_PRI_PERF + 1,
+};
+
 static int rapl_check_hw_unit(bool apply_quirk)
 {
        u64 msr_rapl_power_unit_bits;
@@ -660,7 +709,7 @@ static int __init rapl_prepare_cpus(void)
        return 0;
 }
 
-static void __init cleanup_rapl_pmus(void)
+static void cleanup_rapl_pmus(void)
 {
        int i;
 
@@ -691,52 +740,92 @@ static int __init init_rapl_pmus(void)
        return 0;
 }
 
+#define X86_RAPL_MODEL_MATCH(model, init)      \
+       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_rapl_init_fun {
+       bool apply_quirk;
+       int cntr_mask;
+       struct attribute **attrs;
+};
+
+static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
+       .apply_quirk = false,
+       .cntr_mask = RAPL_IDX_CLN,
+       .attrs = rapl_events_cln_attr,
+};
+
+static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
+       .apply_quirk = true,
+       .cntr_mask = RAPL_IDX_SRV,
+       .attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
+       .apply_quirk = false,
+       .cntr_mask = RAPL_IDX_HSW,
+       .attrs = rapl_events_hsw_attr,
+};
+
+static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
+       .apply_quirk = false,
+       .cntr_mask = RAPL_IDX_SRV,
+       .attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
+       .apply_quirk = true,
+       .cntr_mask = RAPL_IDX_KNL,
+       .attrs = rapl_events_knl_attr,
+};
+
+static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
+       .apply_quirk = false,
+       .cntr_mask = RAPL_IDX_SKL_CLN,
+       .attrs = rapl_events_skl_attr,
+};
+
 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
-       [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
-       [1] = {},
+       X86_RAPL_MODEL_MATCH(42, snb_rapl_init),        /* Sandy Bridge */
+       X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),      /* Sandy Bridge-EP */
+
+       X86_RAPL_MODEL_MATCH(58, snb_rapl_init),        /* Ivy Bridge */
+       X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),      /* IvyTown */
+
+       X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),        /* Haswell */
+       X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),        /* Haswell-Server */
+       X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),        /* Haswell-Celeron */
+       X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),        /* Haswell GT3e */
+
+       X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),        /* Broadwell */
+       X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),        /* Broadwell-H */
+       X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),        /* Broadwell-Server */
+       X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),        /* Broadwell Xeon D */
+
+       X86_RAPL_MODEL_MATCH(87, knl_rapl_init),        /* Knights Landing */
+
+       X86_RAPL_MODEL_MATCH(78, skl_rapl_init),        /* Skylake */
+       X86_RAPL_MODEL_MATCH(94, skl_rapl_init),        /* Skylake H/S */
+       {},
 };
 
+MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
+
 static int __init rapl_pmu_init(void)
 {
-       bool apply_quirk = false;
+       const struct x86_cpu_id *id;
+       struct intel_rapl_init_fun *rapl_init;
+       bool apply_quirk;
        int ret;
 
-       if (!x86_match_cpu(rapl_cpu_match))
+       id = x86_match_cpu(rapl_cpu_match);
+       if (!id)
                return -ENODEV;
 
-       switch (boot_cpu_data.x86_model) {
-       case 42: /* Sandy Bridge */
-       case 58: /* Ivy Bridge */
-               rapl_cntr_mask = RAPL_IDX_CLN;
-               rapl_pmu_events_group.attrs = rapl_events_cln_attr;
-               break;
-       case 63: /* Haswell-Server */
-       case 79: /* Broadwell-Server */
-               apply_quirk = true;
-               rapl_cntr_mask = RAPL_IDX_SRV;
-               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-               break;
-       case 60: /* Haswell */
-       case 69: /* Haswell-Celeron */
-       case 70: /* Haswell GT3e */
-       case 61: /* Broadwell */
-       case 71: /* Broadwell-H */
-               rapl_cntr_mask = RAPL_IDX_HSW;
-               rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
-               break;
-       case 45: /* Sandy Bridge-EP */
-       case 62: /* IvyTown */
-               rapl_cntr_mask = RAPL_IDX_SRV;
-               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-               break;
-       case 87: /* Knights Landing */
-               apply_quirk = true;
-               rapl_cntr_mask = RAPL_IDX_KNL;
-               rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-               break;
-       default:
-               return -ENODEV;
-       }
+       rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
+       apply_quirk = rapl_init->apply_quirk;
+       rapl_cntr_mask = rapl_init->cntr_mask;
+       rapl_pmu_events_group.attrs = rapl_init->attrs;
 
        ret = rapl_check_hw_unit(apply_quirk);
        if (ret)
@@ -756,7 +845,7 @@ static int __init rapl_pmu_init(void)
        if (ret)
                goto out;
 
-       __perf_cpu_notifier(rapl_cpu_notifier);
+       __register_cpu_notifier(&rapl_cpu_nb);
        cpu_notifier_register_done();
        rapl_advertise();
        return 0;
@@ -767,4 +856,14 @@ out:
        cpu_notifier_register_done();
        return ret;
 }
-device_initcall(rapl_pmu_init);
+module_init(rapl_pmu_init);
+
+static void __exit intel_rapl_exit(void)
+{
+       cpu_notifier_register_begin();
+       __unregister_cpu_notifier(&rapl_cpu_nb);
+       perf_pmu_unregister(&rapl_pmus->pmu);
+       cleanup_rapl_pmus();
+       cpu_notifier_register_done();
+}
+module_exit(intel_rapl_exit);
index 7012d18bb293073e7cf0021cab79c50e3e42289c..17734a6ef474c8e9090181ec4ffbcee4f11e538f 100644 (file)
@@ -1,3 +1,4 @@
+#include <asm/cpu_device_id.h>
 #include "uncore.h"
 
 static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -21,6 +22,8 @@ static struct event_constraint uncore_constraint_fixed =
 struct event_constraint uncore_constraint_empty =
        EVENT_CONSTRAINT(0, 0, 0);
 
+MODULE_LICENSE("GPL");
+
 static int uncore_pcibus_to_physid(struct pci_bus *bus)
 {
        struct pci2phy_map *map;
@@ -754,7 +757,7 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
        pmu->registered = false;
 }
 
-static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
 {
        struct intel_uncore_pmu *pmu = type->pmus;
        struct intel_uncore_box *box;
@@ -770,7 +773,7 @@ static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
        }
 }
 
-static void __init uncore_exit_boxes(void *dummy)
+static void uncore_exit_boxes(void *dummy)
 {
        struct intel_uncore_type **types;
 
@@ -787,7 +790,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
        kfree(pmu->boxes);
 }
 
-static void __init uncore_type_exit(struct intel_uncore_type *type)
+static void uncore_type_exit(struct intel_uncore_type *type)
 {
        struct intel_uncore_pmu *pmu = type->pmus;
        int i;
@@ -804,7 +807,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
        type->events_group = NULL;
 }
 
-static void __init uncore_types_exit(struct intel_uncore_type **types)
+static void uncore_types_exit(struct intel_uncore_type **types)
 {
        for (; *types; types++)
                uncore_type_exit(*types);
@@ -989,46 +992,6 @@ static int __init uncore_pci_init(void)
        size_t size;
        int ret;
 
-       switch (boot_cpu_data.x86_model) {
-       case 45: /* Sandy Bridge-EP */
-               ret = snbep_uncore_pci_init();
-               break;
-       case 62: /* Ivy Bridge-EP */
-               ret = ivbep_uncore_pci_init();
-               break;
-       case 63: /* Haswell-EP */
-               ret = hswep_uncore_pci_init();
-               break;
-       case 79: /* BDX-EP */
-       case 86: /* BDX-DE */
-               ret = bdx_uncore_pci_init();
-               break;
-       case 42: /* Sandy Bridge */
-               ret = snb_uncore_pci_init();
-               break;
-       case 58: /* Ivy Bridge */
-               ret = ivb_uncore_pci_init();
-               break;
-       case 60: /* Haswell */
-       case 69: /* Haswell Celeron */
-               ret = hsw_uncore_pci_init();
-               break;
-       case 61: /* Broadwell */
-               ret = bdw_uncore_pci_init();
-               break;
-       case 87: /* Knights Landing */
-               ret = knl_uncore_pci_init();
-               break;
-       case 94: /* SkyLake */
-               ret = skl_uncore_pci_init();
-               break;
-       default:
-               return -ENODEV;
-       }
-
-       if (ret)
-               return ret;
-
        size = max_packages * sizeof(struct pci_extra_dev);
        uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
        if (!uncore_extra_pci_dev) {
@@ -1060,7 +1023,7 @@ err:
        return ret;
 }
 
-static void __init uncore_pci_exit(void)
+static void uncore_pci_exit(void)
 {
        if (pcidrv_registered) {
                pcidrv_registered = false;
@@ -1287,46 +1250,6 @@ static int __init uncore_cpu_init(void)
 {
        int ret;
 
-       switch (boot_cpu_data.x86_model) {
-       case 26: /* Nehalem */
-       case 30:
-       case 37: /* Westmere */
-       case 44:
-               nhm_uncore_cpu_init();
-               break;
-       case 42: /* Sandy Bridge */
-       case 58: /* Ivy Bridge */
-       case 60: /* Haswell */
-       case 69: /* Haswell */
-       case 70: /* Haswell */
-       case 61: /* Broadwell */
-       case 71: /* Broadwell */
-               snb_uncore_cpu_init();
-               break;
-       case 45: /* Sandy Bridge-EP */
-               snbep_uncore_cpu_init();
-               break;
-       case 46: /* Nehalem-EX */
-       case 47: /* Westmere-EX aka. Xeon E7 */
-               nhmex_uncore_cpu_init();
-               break;
-       case 62: /* Ivy Bridge-EP */
-               ivbep_uncore_cpu_init();
-               break;
-       case 63: /* Haswell-EP */
-               hswep_uncore_cpu_init();
-               break;
-       case 79: /* BDX-EP */
-       case 86: /* BDX-DE */
-               bdx_uncore_cpu_init();
-               break;
-       case 87: /* Knights Landing */
-               knl_uncore_cpu_init();
-               break;
-       default:
-               return -ENODEV;
-       }
-
        ret = uncore_types_init(uncore_msr_uncores, true);
        if (ret)
                goto err;
@@ -1376,11 +1299,105 @@ static int __init uncore_cpumask_init(bool msr)
        return 0;
 }
 
+#define X86_UNCORE_MODEL_MATCH(model, init)    \
+       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_uncore_init_fun {
+       void    (*cpu_init)(void);
+       int     (*pci_init)(void);
+};
+
+static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
+       .cpu_init = nhm_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
+       .cpu_init = snb_uncore_cpu_init,
+       .pci_init = snb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
+       .cpu_init = snb_uncore_cpu_init,
+       .pci_init = ivb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
+       .cpu_init = snb_uncore_cpu_init,
+       .pci_init = hsw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
+       .cpu_init = snb_uncore_cpu_init,
+       .pci_init = bdw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
+       .cpu_init = snbep_uncore_cpu_init,
+       .pci_init = snbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
+       .cpu_init = nhmex_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
+       .cpu_init = ivbep_uncore_cpu_init,
+       .pci_init = ivbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
+       .cpu_init = hswep_uncore_cpu_init,
+       .pci_init = hswep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
+       .cpu_init = bdx_uncore_cpu_init,
+       .pci_init = bdx_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
+       .cpu_init = knl_uncore_cpu_init,
+       .pci_init = knl_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+       .pci_init = skl_uncore_pci_init,
+};
+
+static const struct x86_cpu_id intel_uncore_match[] __initconst = {
+       X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),    /* Nehalem */
+       X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
+       X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),    /* Westmere */
+       X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
+       X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),    /* Sandy Bridge */
+       X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),    /* Ivy Bridge */
+       X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),    /* Haswell */
+       X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),    /* Haswell Celeron */
+       X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),    /* Haswell */
+       X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),    /* Broadwell */
+       X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),    /* Broadwell */
+       X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),  /* Sandy Bridge-EP */
+       X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),  /* Nehalem-EX */
+       X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),  /* Westmere-EX aka. Xeon E7 */
+       X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),  /* Ivy Bridge-EP */
+       X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),  /* Haswell-EP */
+       X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),    /* BDX-EP */
+       X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),    /* BDX-DE */
+       X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),    /* Knights Landing */
+       X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),    /* SkyLake */
+       {},
+};
+
+MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
+
 static int __init intel_uncore_init(void)
 {
-       int pret, cret, ret;
+       const struct x86_cpu_id *id;
+       struct intel_uncore_init_fun *uncore_init;
+       int pret = 0, cret = 0, ret;
 
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+       id = x86_match_cpu(intel_uncore_match);
+       if (!id)
                return -ENODEV;
 
        if (cpu_has_hypervisor)
@@ -1388,8 +1405,17 @@ static int __init intel_uncore_init(void)
 
        max_packages = topology_max_packages();
 
-       pret = uncore_pci_init();
-       cret = uncore_cpu_init();
+       uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
+       if (uncore_init->pci_init) {
+               pret = uncore_init->pci_init();
+               if (!pret)
+                       pret = uncore_pci_init();
+       }
+
+       if (uncore_init->cpu_init) {
+               uncore_init->cpu_init();
+               cret = uncore_cpu_init();
+       }
 
        if (cret && pret)
                return -ENODEV;
@@ -1409,4 +1435,14 @@ err:
        cpu_notifier_register_done();
        return ret;
 }
-device_initcall(intel_uncore_init);
+module_init(intel_uncore_init);
+
+static void __exit intel_uncore_exit(void)
+{
+       cpu_notifier_register_begin();
+       __unregister_cpu_notifier(&uncore_cpu_nb);
+       uncore_types_exit(uncore_msr_uncores);
+       uncore_pci_exit();
+       cpu_notifier_register_done();
+}
+module_exit(intel_uncore_exit);
index ec863b9a9f780c7507634353d64f9c2f76f1a0e1..7111400a1f9a0d07becfd8bb77d8588f35f05032 100644 (file)
@@ -6,6 +6,8 @@ enum perf_msr_id {
        PERF_MSR_MPERF                  = 2,
        PERF_MSR_PPERF                  = 3,
        PERF_MSR_SMI                    = 4,
+       PERF_MSR_PTSC                   = 5,
+       PERF_MSR_IRPERF                 = 6,
 
        PERF_MSR_EVENT_MAX,
 };
@@ -15,6 +17,16 @@ static bool test_aperfmperf(int idx)
        return boot_cpu_has(X86_FEATURE_APERFMPERF);
 }
 
+static bool test_ptsc(int idx)
+{
+       return boot_cpu_has(X86_FEATURE_PTSC);
+}
+
+static bool test_irperf(int idx)
+{
+       return boot_cpu_has(X86_FEATURE_IRPERF);
+}
+
 static bool test_intel(int idx)
 {
        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
@@ -69,18 +81,22 @@ struct perf_msr {
        bool    (*test)(int idx);
 };
 
-PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
+PMU_EVENT_ATTR_STRING(tsc,    evattr_tsc,    "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf,  evattr_aperf,  "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf,  evattr_mperf,  "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf,  evattr_pperf,  "event=0x03");
+PMU_EVENT_ATTR_STRING(smi,    evattr_smi,    "event=0x04");
+PMU_EVENT_ATTR_STRING(ptsc,   evattr_ptsc,   "event=0x05");
+PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
 
 static struct perf_msr msr[] = {
-       [PERF_MSR_TSC]   = { 0,                 &evattr_tsc,    NULL,            },
-       [PERF_MSR_APERF] = { MSR_IA32_APERF,    &evattr_aperf,  test_aperfmperf, },
-       [PERF_MSR_MPERF] = { MSR_IA32_MPERF,    &evattr_mperf,  test_aperfmperf, },
-       [PERF_MSR_PPERF] = { MSR_PPERF,         &evattr_pperf,  test_intel,      },
-       [PERF_MSR_SMI]   = { MSR_SMI_COUNT,     &evattr_smi,    test_intel,      },
+       [PERF_MSR_TSC]    = { 0,                &evattr_tsc,    NULL,            },
+       [PERF_MSR_APERF]  = { MSR_IA32_APERF,   &evattr_aperf,  test_aperfmperf, },
+       [PERF_MSR_MPERF]  = { MSR_IA32_MPERF,   &evattr_mperf,  test_aperfmperf, },
+       [PERF_MSR_PPERF]  = { MSR_PPERF,        &evattr_pperf,  test_intel,      },
+       [PERF_MSR_SMI]    = { MSR_SMI_COUNT,    &evattr_smi,    test_intel,      },
+       [PERF_MSR_PTSC]   = { MSR_F15H_PTSC,    &evattr_ptsc,   test_ptsc,       },
+       [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF,  &evattr_irperf, test_irperf,     },
 };
 
 static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
index ad4dc7ffffb5eb44eeb79c08e924697547f14ebd..8bd764df815d36287ca4b29effb969ede50735a6 100644 (file)
@@ -601,6 +601,7 @@ struct x86_pmu {
        u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
        const int       *lbr_sel_map;              /* lbr_select mappings */
        bool            lbr_double_abort;          /* duplicated lbr aborts */
+       bool            lbr_pt_coexist;            /* LBR may coexist with PT */
 
        /*
         * Intel PT/LBR/BTS are exclusive
@@ -859,6 +860,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[];
 
 extern struct event_constraint intel_slm_pebs_event_constraints[];
 
+extern struct event_constraint intel_glm_pebs_event_constraints[];
+
 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
 
 extern struct event_constraint intel_westmere_pebs_event_constraints[];
@@ -907,6 +910,8 @@ void intel_pmu_lbr_init_nhm(void);
 
 void intel_pmu_lbr_init_atom(void);
 
+void intel_pmu_lbr_init_slm(void);
+
 void intel_pmu_lbr_init_snb(void);
 
 void intel_pmu_lbr_init_hsw(void);
index 8f9afefd2dc5ab8159131ae1815f9ae122d7024b..8cd6a320f6585a3d9cb3dd810e13a6e0aa0731c9 100644 (file)
 #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
 #define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
 #define X86_FEATURE_BPEXT      (6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PTSC       ( 6*32+27) /* performance time-stamp counter */
 #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
 #define X86_FEATURE_MWAITX     ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
 
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO     (13*32+0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF     (13*32+1) /* Instructions Retired Count */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
 #define X86_FEATURE_DTHERM     (14*32+ 0) /* Digital Thermal Sensor */
index 5b3c9a55f51cbeda86cb52dedac1f3f494042e55..5a73a9c62c392f676bc786b58a27e50e3476a123 100644 (file)
 #define MSR_PEBS_LD_LAT_THRESHOLD      0x000003f6
 
 #define MSR_IA32_RTIT_CTL              0x00000570
-#define RTIT_CTL_TRACEEN               BIT(0)
-#define RTIT_CTL_CYCLEACC              BIT(1)
-#define RTIT_CTL_OS                    BIT(2)
-#define RTIT_CTL_USR                   BIT(3)
-#define RTIT_CTL_CR3EN                 BIT(7)
-#define RTIT_CTL_TOPA                  BIT(8)
-#define RTIT_CTL_MTC_EN                        BIT(9)
-#define RTIT_CTL_TSC_EN                        BIT(10)
-#define RTIT_CTL_DISRETC               BIT(11)
-#define RTIT_CTL_BRANCH_EN             BIT(13)
-#define RTIT_CTL_MTC_RANGE_OFFSET      14
-#define RTIT_CTL_MTC_RANGE             (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
-#define RTIT_CTL_CYC_THRESH_OFFSET     19
-#define RTIT_CTL_CYC_THRESH            (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
-#define RTIT_CTL_PSB_FREQ_OFFSET       24
-#define RTIT_CTL_PSB_FREQ                      (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
 #define MSR_IA32_RTIT_STATUS           0x00000571
-#define RTIT_STATUS_CONTEXTEN          BIT(1)
-#define RTIT_STATUS_TRIGGEREN          BIT(2)
-#define RTIT_STATUS_ERROR              BIT(4)
-#define RTIT_STATUS_STOPPED            BIT(5)
+#define MSR_IA32_RTIT_STATUS           0x00000571
+#define MSR_IA32_RTIT_ADDR0_A          0x00000580
+#define MSR_IA32_RTIT_ADDR0_B          0x00000581
+#define MSR_IA32_RTIT_ADDR1_A          0x00000582
+#define MSR_IA32_RTIT_ADDR1_B          0x00000583
+#define MSR_IA32_RTIT_ADDR2_A          0x00000584
+#define MSR_IA32_RTIT_ADDR2_B          0x00000585
+#define MSR_IA32_RTIT_ADDR3_A          0x00000586
+#define MSR_IA32_RTIT_ADDR3_B          0x00000587
 #define MSR_IA32_RTIT_CR3_MATCH                0x00000572
 #define MSR_IA32_RTIT_OUTPUT_BASE      0x00000560
 #define MSR_IA32_RTIT_OUTPUT_MASK      0x00000561
 #define MSR_CONFIG_TDP_CONTROL         0x0000064B
 #define MSR_TURBO_ACTIVATION_RATIO     0x0000064C
 
+#define MSR_PLATFORM_ENERGY_STATUS     0x0000064D
+
 #define MSR_PKG_WEIGHTED_CORE_C0_RES   0x00000658
 #define MSR_PKG_ANY_CORE_C0_RES                0x00000659
 #define MSR_PKG_ANY_GFXE_C0_RES                0x0000065A
 #define MSR_AMD64_IBSOPDATA4           0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX    8 /* includes MSR_AMD64_IBSBRTARGET */
 
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF                        0xc00000e9
+
 /* Fam 16h MSRs */
 #define MSR_F16H_L2I_PERF_CTL          0xc0010230
 #define MSR_F16H_L2I_PERF_CTR          0xc0010231
 #define MSR_F15H_PERF_CTR              0xc0010201
 #define MSR_F15H_NB_PERF_CTL           0xc0010240
 #define MSR_F15H_NB_PERF_CTR           0xc0010241
+#define MSR_F15H_PTSC                  0xc0010280
 #define MSR_F15H_IC_CFG                        0xc0011021
 
 /* Fam 10h MSRs */
index bf4db6eaec8fda2aae48f2fe7c1cddb8d157b37f..bd074151bfd641cf93afa6061e9452fd2aefbd1d 100644 (file)
@@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
        riprel_post_xol(auprobe, regs);
 }
 
-static struct uprobe_xol_ops default_xol_ops = {
+static const struct uprobe_xol_ops default_xol_ops = {
        .pre_xol  = default_pre_xol_op,
        .post_xol = default_post_xol_op,
        .abort    = default_abort_op,
@@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
                0, insn->immediate.nbytes);
 }
 
-static struct uprobe_xol_ops branch_xol_ops = {
+static const struct uprobe_xol_ops branch_xol_ops = {
        .emulate  = branch_emulate_op,
        .post_xol = branch_post_xol_op,
 };
index 54f01188c29c1a4048ac166d9383d7f3a26905c8..a6b00b3af42993e937181a8412c8949f8ae65983 100644 (file)
@@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data)
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
                           struct pt_regs *regs)
 {
-       xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+       xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
                                callchain_trace, NULL, entry);
 }
 
 void perf_callchain_user(struct perf_callchain_entry *entry,
                         struct pt_regs *regs)
 {
-       xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+       xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
                              callchain_trace, entry);
 }
 
index f70090897fdf19c9777c332401dd01de7b1efc52..f2d01d4d93645a0b029561ba2febf7fed56ec276 100644 (file)
@@ -847,6 +847,14 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
        if (!platform_get_irq(cpu_pmu->plat_device, 0))
                cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
+       /*
+        * This is a CPU PMU potentially in a heterogeneous configuration (e.g.
+        * big.LITTLE). This is not an uncore PMU, and we have taken ctx
+        * sharing into account (e.g. with our pmu::filter_match callback and
+        * pmu::event_init group validation).
+        */
+       cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
+
        return 0;
 
 out_unregister:
index 8fad0a7044d3d332b8733eaff65d2fb33dd46e61..f2201d42a9cdd8a5acde7000127aa29e2cc83f59 100644 (file)
@@ -34,6 +34,9 @@
 #include <asm/processor.h>
 #include <asm/cpu_device_id.h>
 
+/* Local defines */
+#define MSR_PLATFORM_POWER_LIMIT       0x0000065C
+
 /* bitmasks for RAPL MSRs, used by primitive access functions */
 #define ENERGY_STATUS_MASK      0xffffffff
 
@@ -86,6 +89,7 @@ enum rapl_domain_type {
        RAPL_DOMAIN_PP0, /* core power plane */
        RAPL_DOMAIN_PP1, /* graphics uncore */
        RAPL_DOMAIN_DRAM,/* DRAM control_type */
+       RAPL_DOMAIN_PLATFORM, /* PSys control_type */
        RAPL_DOMAIN_MAX,
 };
 
@@ -251,9 +255,11 @@ static const char * const rapl_domain_names[] = {
        "core",
        "uncore",
        "dram",
+       "psys",
 };
 
 static struct powercap_control_type *control_type; /* PowerCap Controller */
+static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
 
 /* caller to ensure CPU hotplug lock is held */
 static struct rapl_package *find_package_by_id(int id)
@@ -409,6 +415,14 @@ static const struct powercap_zone_ops zone_ops[] = {
                .set_enable = set_domain_enable,
                .get_enable = get_domain_enable,
        },
+       /* RAPL_DOMAIN_PLATFORM */
+       {
+               .get_energy_uj = get_energy_counter,
+               .get_max_energy_range_uj = get_max_energy_counter,
+               .release = release_zone,
+               .set_enable = set_domain_enable,
+               .get_enable = get_domain_enable,
+       },
 };
 
 static int set_power_limit(struct powercap_zone *power_zone, int id,
@@ -1160,6 +1174,13 @@ static int rapl_unregister_powercap(void)
                        powercap_unregister_zone(control_type,
                                                &rd_package->power_zone);
        }
+
+       if (platform_rapl_domain) {
+               powercap_unregister_zone(control_type,
+                                        &platform_rapl_domain->power_zone);
+               kfree(platform_rapl_domain);
+       }
+
        powercap_unregister_control_type(control_type);
 
        return 0;
@@ -1239,6 +1260,47 @@ err_cleanup:
        return ret;
 }
 
+static int rapl_register_psys(void)
+{
+       struct rapl_domain *rd;
+       struct powercap_zone *power_zone;
+       u64 val;
+
+       if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val)
+               return -ENODEV;
+
+       if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val)
+               return -ENODEV;
+
+       rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+       if (!rd)
+               return -ENOMEM;
+
+       rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM];
+       rd->id = RAPL_DOMAIN_PLATFORM;
+       rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT;
+       rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS;
+       rd->rpl[0].prim_id = PL1_ENABLE;
+       rd->rpl[0].name = pl1_name;
+       rd->rpl[1].prim_id = PL2_ENABLE;
+       rd->rpl[1].name = pl2_name;
+       rd->rp = find_package_by_id(0);
+
+       power_zone = powercap_register_zone(&rd->power_zone, control_type,
+                                           "psys", NULL,
+                                           &zone_ops[RAPL_DOMAIN_PLATFORM],
+                                           2, &constraint_ops);
+
+       if (IS_ERR(power_zone)) {
+               kfree(rd);
+               return PTR_ERR(power_zone);
+       }
+
+       platform_rapl_domain = rd;
+
+       return 0;
+}
+
 static int rapl_register_powercap(void)
 {
        struct rapl_domain *rd;
@@ -1255,6 +1317,10 @@ static int rapl_register_powercap(void)
        list_for_each_entry(rp, &rapl_packages, plist)
                if (rapl_package_register_powercap(rp))
                        goto err_cleanup_package;
+
+       /* Don't bail out if PSys is not supported */
+       rapl_register_psys();
+
        return ret;
 
 err_cleanup_package:
@@ -1289,6 +1355,9 @@ static int rapl_check_domain(int cpu, int domain)
        case RAPL_DOMAIN_DRAM:
                msr = MSR_DRAM_ENERGY_STATUS;
                break;
+       case RAPL_DOMAIN_PLATFORM:
+               /* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */
+               return -EINVAL;
        default:
                pr_err("invalid domain id %d\n", domain);
                return -EINVAL;
index f291275ffd71730f39dcab3e1fd227110088325a..9e1c3ada91c49b36b643496ffe5fcd37917bb5b8 100644 (file)
@@ -58,7 +58,7 @@ struct perf_guest_info_callbacks {
 
 struct perf_callchain_entry {
        __u64                           nr;
-       __u64                           ip[PERF_MAX_STACK_DEPTH];
+       __u64                           ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
 };
 
 struct perf_raw_record {
@@ -151,6 +151,15 @@ struct hw_perf_event {
         */
        struct task_struct              *target;
 
+       /*
+        * PMU would store hardware filter configuration
+        * here.
+        */
+       void                            *addr_filters;
+
+       /* Last sync'ed generation of filters */
+       unsigned long                   addr_filters_gen;
+
 /*
  * hw_perf_event::state flags; used to track the PERF_EF_* state.
  */
@@ -216,6 +225,7 @@ struct perf_event;
 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF          0x08
 #define PERF_PMU_CAP_EXCLUSIVE                 0x10
 #define PERF_PMU_CAP_ITRACE                    0x20
+#define PERF_PMU_CAP_HETEROGENEOUS_CPUS                0x40
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -240,6 +250,9 @@ struct pmu {
        int                             task_ctx_nr;
        int                             hrtimer_interval_ms;
 
+       /* number of address filters this PMU can do */
+       unsigned int                    nr_addr_filters;
+
        /*
         * Fully disable/enable this PMU, can be used to protect from the PMI
         * as well as for lazy/batch writing of the MSRs.
@@ -392,12 +405,71 @@ struct pmu {
         */
        void (*free_aux)                (void *aux); /* optional */
 
+       /*
+        * Validate address range filters: make sure the HW supports the
+        * requested configuration and number of filters; return 0 if the
+        * supplied filters are valid, -errno otherwise.
+        *
+        * Runs in the context of the ioctl()ing process and is not serialized
+        * with the rest of the PMU callbacks.
+        */
+       int (*addr_filters_validate)    (struct list_head *filters);
+                                       /* optional */
+
+       /*
+        * Synchronize address range filter configuration:
+        * translate hw-agnostic filters into hardware configuration in
+        * event::hw::addr_filters.
+        *
+        * Runs as a part of filter sync sequence that is done in ->start()
+        * callback by calling perf_event_addr_filters_sync().
+        *
+        * May (and should) traverse event::addr_filters::list, for which its
+        * caller provides necessary serialization.
+        */
+       void (*addr_filters_sync)       (struct perf_event *event);
+                                       /* optional */
+
        /*
         * Filter events for PMU-specific reasons.
         */
        int (*filter_match)             (struct perf_event *event); /* optional */
 };
 
+/**
+ * struct perf_addr_filter - address range filter definition
+ * @entry:     event's filter list linkage
+ * @inode:     object file's inode for file-based filters
+ * @offset:    filter range offset
+ * @size:      filter range size
+ * @range:     1: range, 0: address
+ * @filter:    1: filter/start, 0: stop
+ *
+ * This is a hardware-agnostic filter configuration as specified by the user.
+ */
+struct perf_addr_filter {
+       struct list_head        entry;
+       struct inode            *inode;
+       unsigned long           offset;
+       unsigned long           size;
+       unsigned int            range   : 1,
+                               filter  : 1;
+};
+
+/**
+ * struct perf_addr_filters_head - container for address range filters
+ * @list:      list of filters for this event
+ * @lock:      spinlock that serializes accesses to the @list and event's
+ *             (and its children's) filter generations.
+ *
+ * A child event will use parent's @list (and therefore @lock), so they are
+ * bundled together; see perf_event_addr_filters().
+ */
+struct perf_addr_filters_head {
+       struct list_head        list;
+       raw_spinlock_t          lock;
+};
+
 /**
  * enum perf_event_active_state - the states of a event
  */
@@ -566,6 +638,12 @@ struct perf_event {
 
        atomic_t                        event_limit;
 
+       /* address range filters */
+       struct perf_addr_filters_head   addr_filters;
+       /* vma address array for file-based filders */
+       unsigned long                   *addr_filters_offs;
+       unsigned long                   addr_filters_gen;
+
        void (*destroy)(struct perf_event *);
        struct rcu_head                 rcu_head;
 
@@ -834,9 +912,25 @@ extern int perf_event_overflow(struct perf_event *event,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs);
 
+extern void perf_event_output_forward(struct perf_event *event,
+                                    struct perf_sample_data *data,
+                                    struct pt_regs *regs);
+extern void perf_event_output_backward(struct perf_event *event,
+                                      struct perf_sample_data *data,
+                                      struct pt_regs *regs);
 extern void perf_event_output(struct perf_event *event,
-                               struct perf_sample_data *data,
-                               struct pt_regs *regs);
+                             struct perf_sample_data *data,
+                             struct pt_regs *regs);
+
+static inline bool
+is_default_overflow_handler(struct perf_event *event)
+{
+       if (likely(event->overflow_handler == perf_event_output_forward))
+               return true;
+       if (unlikely(event->overflow_handler == perf_event_output_backward))
+               return true;
+       return false;
+}
 
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
@@ -977,9 +1071,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 extern int get_callchain_buffers(void);
 extern void put_callchain_buffers(void);
 
+extern int sysctl_perf_event_max_stack;
+
 static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
-       if (entry->nr < PERF_MAX_STACK_DEPTH) {
+       if (entry->nr < sysctl_perf_event_max_stack) {
                entry->ip[entry->nr++] = ip;
                return 0;
        } else {
@@ -1001,6 +1097,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos);
 
+int perf_event_max_stack_handler(struct ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp, loff_t *ppos);
 
 static inline bool perf_paranoid_tracepoint_raw(void)
 {
@@ -1045,8 +1143,41 @@ static inline bool has_aux(struct perf_event *event)
        return event->pmu->setup_aux;
 }
 
+static inline bool is_write_backward(struct perf_event *event)
+{
+       return !!event->attr.write_backward;
+}
+
+static inline bool has_addr_filter(struct perf_event *event)
+{
+       return event->pmu->nr_addr_filters;
+}
+
+/*
+ * An inherited event uses parent's filters
+ */
+static inline struct perf_addr_filters_head *
+perf_event_addr_filters(struct perf_event *event)
+{
+       struct perf_addr_filters_head *ifh = &event->addr_filters;
+
+       if (event->parent)
+               ifh = &event->parent->addr_filters;
+
+       return ifh;
+}
+
+extern void perf_event_addr_filters_sync(struct perf_event *event);
+
 extern int perf_output_begin(struct perf_output_handle *handle,
                             struct perf_event *event, unsigned int size);
+extern int perf_output_begin_forward(struct perf_output_handle *handle,
+                                   struct perf_event *event,
+                                   unsigned int size);
+extern int perf_output_begin_backward(struct perf_output_handle *handle,
+                                     struct perf_event *event,
+                                     unsigned int size);
+
 extern void perf_output_end(struct perf_output_handle *handle);
 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
                             const void *buf, unsigned int len);
index 1afe9623c1a7268cd2ef8949a248ef79216fe2e9..43fc8d21347246fbc5f9c8c2213b7fda86632da8 100644 (file)
@@ -340,7 +340,8 @@ struct perf_event_attr {
                                comm_exec      :  1, /* flag comm events that are due to an exec */
                                use_clockid    :  1, /* use @clockid for time fields */
                                context_switch :  1, /* context switch data */
-                               __reserved_1   : 37;
+                               write_backward :  1, /* Write ring buffer from end to beginning */
+                               __reserved_1   : 36;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -401,6 +402,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_FILTER      _IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID              _IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF         _IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT    _IOW('$', 9, __u32)
 
 enum perf_event_ioc_flags {
        PERF_IOC_FLAG_GROUP             = 1U << 0,
index 499d9e933f8e5baab0f3de1199797b56fdc3d589..f5a19548be12ee425ba3a90d1faff12b2dccdb8e 100644 (file)
@@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
        /* check sanity of attributes */
        if (attr->max_entries == 0 || attr->key_size != 4 ||
            value_size < 8 || value_size % 8 ||
-           value_size / 8 > PERF_MAX_STACK_DEPTH)
+           value_size / 8 > sysctl_perf_event_max_stack)
                return ERR_PTR(-EINVAL);
 
        /* hash table size must be power of 2 */
@@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
        struct perf_callchain_entry *trace;
        struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
        u32 max_depth = map->value_size / 8;
-       /* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
-       u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
+       /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+       u32 init_nr = sysctl_perf_event_max_stack - max_depth;
        u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
        u32 hash, id, trace_nr, trace_len;
        bool user = flags & BPF_F_USER_STACK;
@@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
                return -EFAULT;
 
        /* get_perf_callchain() guarantees that trace->nr >= init_nr
-        * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
+        * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
         */
        trace_nr = trace->nr - init_nr;
 
index 343c22f5e867de2bbe6c2220535b413a6ea0f4ed..b9325e7dcba1088d74e2502177d2a22ececce4dc 100644 (file)
@@ -18,6 +18,14 @@ struct callchain_cpus_entries {
        struct perf_callchain_entry     *cpu_entries[0];
 };
 
+int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
+
+static inline size_t perf_callchain_entry__sizeof(void)
+{
+       return (sizeof(struct perf_callchain_entry) +
+               sizeof(__u64) * sysctl_perf_event_max_stack);
+}
+
 static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
 static atomic_t nr_callchain_events;
 static DEFINE_MUTEX(callchain_mutex);
@@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
        if (!entries)
                return -ENOMEM;
 
-       size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+       size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
 
        for_each_possible_cpu(cpu) {
                entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
@@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
 
        cpu = smp_processor_id();
 
-       return &entries->cpu_entries[cpu][*rctx];
+       return (((void *)entries->cpu_entries[cpu]) +
+               (*rctx * perf_callchain_entry__sizeof()));
 }
 
 static void
@@ -215,3 +224,25 @@ exit_put:
 
        return entry;
 }
+
+int perf_event_max_stack_handler(struct ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int new_value = sysctl_perf_event_max_stack, ret;
+       struct ctl_table new_table = *table;
+
+       new_table.data = &new_value;
+       ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
+       if (ret || !write)
+               return ret;
+
+       mutex_lock(&callchain_mutex);
+       if (atomic_read(&nr_callchain_events))
+               ret = -EBUSY;
+       else
+               sysctl_perf_event_max_stack = new_value;
+
+       mutex_unlock(&callchain_mutex);
+
+       return ret;
+}
index c0ded24166158f26b0c4f3ce1c47dd065e28700c..050a290c72c7b9fd09964f0028aaec11f0f246a1 100644 (file)
@@ -44,6 +44,8 @@
 #include <linux/compat.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/namei.h>
+#include <linux/parser.h>
 
 #include "internal.h"
 
@@ -1927,8 +1929,13 @@ event_sched_in(struct perf_event *event,
        if (event->state <= PERF_EVENT_STATE_OFF)
                return 0;
 
-       event->state = PERF_EVENT_STATE_ACTIVE;
-       event->oncpu = smp_processor_id();
+       WRITE_ONCE(event->oncpu, smp_processor_id());
+       /*
+        * Order event::oncpu write to happen before the ACTIVE state
+        * is visible.
+        */
+       smp_wmb();
+       WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
 
        /*
         * Unthrottle events, since we scheduled we might have missed several
@@ -2360,6 +2367,112 @@ void perf_event_enable(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(perf_event_enable);
 
+struct stop_event_data {
+       struct perf_event       *event;
+       unsigned int            restart;
+};
+
+static int __perf_event_stop(void *info)
+{
+       struct stop_event_data *sd = info;
+       struct perf_event *event = sd->event;
+
+       /* if it's already INACTIVE, do nothing */
+       if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+               return 0;
+
+       /* matches smp_wmb() in event_sched_in() */
+       smp_rmb();
+
+       /*
+        * There is a window with interrupts enabled before we get here,
+        * so we need to check again lest we try to stop another CPU's event.
+        */
+       if (READ_ONCE(event->oncpu) != smp_processor_id())
+               return -EAGAIN;
+
+       event->pmu->stop(event, PERF_EF_UPDATE);
+
+       /*
+        * May race with the actual stop (through perf_pmu_output_stop()),
+        * but it is only used for events with AUX ring buffer, and such
+        * events will refuse to restart because of rb::aux_mmap_count==0,
+        * see comments in perf_aux_output_begin().
+        *
+        * Since this is happening on a event-local CPU, no trace is lost
+        * while restarting.
+        */
+       if (sd->restart)
+               event->pmu->start(event, PERF_EF_START);
+
+       return 0;
+}
+
+static int perf_event_restart(struct perf_event *event)
+{
+       struct stop_event_data sd = {
+               .event          = event,
+               .restart        = 1,
+       };
+       int ret = 0;
+
+       do {
+               if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+                       return 0;
+
+               /* matches smp_wmb() in event_sched_in() */
+               smp_rmb();
+
+               /*
+                * We only want to restart ACTIVE events, so if the event goes
+                * inactive here (event->oncpu==-1), there's nothing more to do;
+                * fall through with ret==-ENXIO.
+                */
+               ret = cpu_function_call(READ_ONCE(event->oncpu),
+                                       __perf_event_stop, &sd);
+       } while (ret == -EAGAIN);
+
+       return ret;
+}
+
+/*
+ * In order to contain the amount of racy and tricky in the address filter
+ * configuration management, it is a two part process:
+ *
+ * (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
+ *      we update the addresses of corresponding vmas in
+ *     event::addr_filters_offs array and bump the event::addr_filters_gen;
+ * (p2) when an event is scheduled in (pmu::add), it calls
+ *      perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
+ *      if the generation has changed since the previous call.
+ *
+ * If (p1) happens while the event is active, we restart it to force (p2).
+ *
+ * (1) perf_addr_filters_apply(): adjusting filters' offsets based on
+ *     pre-existing mappings, called once when new filters arrive via SET_FILTER
+ *     ioctl;
+ * (2) perf_addr_filters_adjust(): adjusting filters' offsets based on newly
+ *     registered mapping, called for every new mmap(), with mm::mmap_sem down
+ *     for reading;
+ * (3) perf_event_addr_filters_exec(): clearing filters' offsets in the process
+ *     of exec.
+ */
+void perf_event_addr_filters_sync(struct perf_event *event)
+{
+       struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+
+       if (!has_addr_filter(event))
+               return;
+
+       raw_spin_lock(&ifh->lock);
+       if (event->addr_filters_gen != event->hw.addr_filters_gen) {
+               event->pmu->addr_filters_sync(event);
+               event->hw.addr_filters_gen = event->addr_filters_gen;
+       }
+       raw_spin_unlock(&ifh->lock);
+}
+EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
+
 static int _perf_event_refresh(struct perf_event *event, int refresh)
 {
        /*
@@ -3209,16 +3322,6 @@ out:
                put_ctx(clone_ctx);
 }
 
-void perf_event_exec(void)
-{
-       int ctxn;
-
-       rcu_read_lock();
-       for_each_task_context_nr(ctxn)
-               perf_event_enable_on_exec(ctxn);
-       rcu_read_unlock();
-}
-
 struct perf_read_data {
        struct perf_event *event;
        bool group;
@@ -3720,6 +3823,9 @@ static bool exclusive_event_installable(struct perf_event *event,
        return true;
 }
 
+static void perf_addr_filters_splice(struct perf_event *event,
+                                      struct list_head *head);
+
 static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending);
@@ -3747,6 +3853,8 @@ static void _free_event(struct perf_event *event)
        }
 
        perf_event_free_bpf_prog(event);
+       perf_addr_filters_splice(event, NULL);
+       kfree(event->addr_filters_offs);
 
        if (event->destroy)
                event->destroy(event);
@@ -4343,6 +4451,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
        case PERF_EVENT_IOC_SET_BPF:
                return perf_event_set_bpf_prog(event, arg);
 
+       case PERF_EVENT_IOC_PAUSE_OUTPUT: {
+               struct ring_buffer *rb;
+
+               rcu_read_lock();
+               rb = rcu_dereference(event->rb);
+               if (!rb || !rb->nr_pages) {
+                       rcu_read_unlock();
+                       return -EINVAL;
+               }
+               rb_toggle_paused(rb, !!arg);
+               rcu_read_unlock();
+               return 0;
+       }
        default:
                return -ENOTTY;
        }
@@ -4659,6 +4780,8 @@ static void perf_mmap_open(struct vm_area_struct *vma)
                event->pmu->event_mapped(event);
 }
 
+static void perf_pmu_output_stop(struct perf_event *event);
+
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
  * event, or through other events by use of perf_event_set_output().
@@ -4686,10 +4809,22 @@ static void perf_mmap_close(struct vm_area_struct *vma)
         */
        if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
            atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+               /*
+                * Stop all AUX events that are writing to this buffer,
+                * so that we can free its AUX pages and corresponding PMU
+                * data. Note that after rb::aux_mmap_count dropped to zero,
+                * they won't start any more (see perf_aux_output_begin()).
+                */
+               perf_pmu_output_stop(event);
+
+               /* now it's safe to free the pages */
                atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
                vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
 
+               /* this has to be the last one */
                rb_free_aux(rb);
+               WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
+
                mutex_unlock(&event->mmap_mutex);
        }
 
@@ -5630,9 +5765,13 @@ void perf_prepare_sample(struct perf_event_header *header,
        }
 }
 
-void perf_event_output(struct perf_event *event,
-                       struct perf_sample_data *data,
-                       struct pt_regs *regs)
+static void __always_inline
+__perf_event_output(struct perf_event *event,
+                   struct perf_sample_data *data,
+                   struct pt_regs *regs,
+                   int (*output_begin)(struct perf_output_handle *,
+                                       struct perf_event *,
+                                       unsigned int))
 {
        struct perf_output_handle handle;
        struct perf_event_header header;
@@ -5642,7 +5781,7 @@ void perf_event_output(struct perf_event *event,
 
        perf_prepare_sample(&header, data, event, regs);
 
-       if (perf_output_begin(&handle, event, header.size))
+       if (output_begin(&handle, event, header.size))
                goto exit;
 
        perf_output_sample(&handle, &header, data, event);
@@ -5653,6 +5792,30 @@ exit:
        rcu_read_unlock();
 }
 
+void
+perf_event_output_forward(struct perf_event *event,
+                        struct perf_sample_data *data,
+                        struct pt_regs *regs)
+{
+       __perf_event_output(event, data, regs, perf_output_begin_forward);
+}
+
+void
+perf_event_output_backward(struct perf_event *event,
+                          struct perf_sample_data *data,
+                          struct pt_regs *regs)
+{
+       __perf_event_output(event, data, regs, perf_output_begin_backward);
+}
+
+void
+perf_event_output(struct perf_event *event,
+                 struct perf_sample_data *data,
+                 struct pt_regs *regs)
+{
+       __perf_event_output(event, data, regs, perf_output_begin);
+}
+
 /*
  * read event_id
  */
@@ -5698,15 +5861,18 @@ typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
 static void
 perf_event_aux_ctx(struct perf_event_context *ctx,
                   perf_event_aux_output_cb output,
-                  void *data)
+                  void *data, bool all)
 {
        struct perf_event *event;
 
        list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-               if (event->state < PERF_EVENT_STATE_INACTIVE)
-                       continue;
-               if (!event_filter_match(event))
-                       continue;
+               if (!all) {
+                       if (event->state < PERF_EVENT_STATE_INACTIVE)
+                               continue;
+                       if (!event_filter_match(event))
+                               continue;
+               }
+
                output(event, data);
        }
 }
@@ -5717,7 +5883,7 @@ perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
 {
        rcu_read_lock();
        preempt_disable();
-       perf_event_aux_ctx(task_ctx, output, data);
+       perf_event_aux_ctx(task_ctx, output, data, false);
        preempt_enable();
        rcu_read_unlock();
 }
@@ -5747,19 +5913,147 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
                if (cpuctx->unique_pmu != pmu)
                        goto next;
-               perf_event_aux_ctx(&cpuctx->ctx, output, data);
+               perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
                ctxn = pmu->task_ctx_nr;
                if (ctxn < 0)
                        goto next;
                ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
                if (ctx)
-                       perf_event_aux_ctx(ctx, output, data);
+                       perf_event_aux_ctx(ctx, output, data, false);
 next:
                put_cpu_ptr(pmu->pmu_cpu_context);
        }
        rcu_read_unlock();
 }
 
+/*
+ * Clear all file-based filters at exec, they'll have to be
+ * re-instated when/if these objects are mmapped again.
+ */
+static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
+{
+       struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+       struct perf_addr_filter *filter;
+       unsigned int restart = 0, count = 0;
+       unsigned long flags;
+
+       if (!has_addr_filter(event))
+               return;
+
+       raw_spin_lock_irqsave(&ifh->lock, flags);
+       list_for_each_entry(filter, &ifh->list, entry) {
+               if (filter->inode) {
+                       event->addr_filters_offs[count] = 0;
+                       restart++;
+               }
+
+               count++;
+       }
+
+       if (restart)
+               event->addr_filters_gen++;
+       raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+       if (restart)
+               perf_event_restart(event);
+}
+
+void perf_event_exec(void)
+{
+       struct perf_event_context *ctx;
+       int ctxn;
+
+       rcu_read_lock();
+       for_each_task_context_nr(ctxn) {
+               ctx = current->perf_event_ctxp[ctxn];
+               if (!ctx)
+                       continue;
+
+               perf_event_enable_on_exec(ctxn);
+
+               perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+                                  true);
+       }
+       rcu_read_unlock();
+}
+
+struct remote_output {
+       struct ring_buffer      *rb;
+       int                     err;
+};
+
+static void __perf_event_output_stop(struct perf_event *event, void *data)
+{
+       struct perf_event *parent = event->parent;
+       struct remote_output *ro = data;
+       struct ring_buffer *rb = ro->rb;
+       struct stop_event_data sd = {
+               .event  = event,
+       };
+
+       if (!has_aux(event))
+               return;
+
+       if (!parent)
+               parent = event;
+
+       /*
+        * In case of inheritance, it will be the parent that links to the
+        * ring-buffer, but it will be the child that's actually using it:
+        */
+       if (rcu_dereference(parent->rb) == rb)
+               ro->err = __perf_event_stop(&sd);
+}
+
+static int __perf_pmu_output_stop(void *info)
+{
+       struct perf_event *event = info;
+       struct pmu *pmu = event->pmu;
+       struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+       struct remote_output ro = {
+               .rb     = event->rb,
+       };
+
+       rcu_read_lock();
+       perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+       if (cpuctx->task_ctx)
+               perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+                                  &ro, false);
+       rcu_read_unlock();
+
+       return ro.err;
+}
+
+static void perf_pmu_output_stop(struct perf_event *event)
+{
+       struct perf_event *iter;
+       int err, cpu;
+
+restart:
+       rcu_read_lock();
+       list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
+               /*
+                * For per-CPU events, we need to make sure that neither they
+                * nor their children are running; for cpu==-1 events it's
+                * sufficient to stop the event itself if it's active, since
+                * it can't have children.
+                */
+               cpu = iter->cpu;
+               if (cpu == -1)
+                       cpu = READ_ONCE(iter->oncpu);
+
+               if (cpu == -1)
+                       continue;
+
+               err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
+               if (err == -EAGAIN) {
+                       rcu_read_unlock();
+                       goto restart;
+               }
+       }
+       rcu_read_unlock();
+}
+
 /*
  * task tracking -- fork/exit
  *
@@ -6169,6 +6463,87 @@ got_name:
        kfree(buf);
 }
 
+/*
+ * Whether this @filter depends on a dynamic object which is not loaded
+ * yet or its load addresses are not known.
+ */
+static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
+{
+       return filter->filter && filter->inode;
+}
+
+/*
+ * Check whether inode and address range match filter criteria.
+ */
+static bool perf_addr_filter_match(struct perf_addr_filter *filter,
+                                    struct file *file, unsigned long offset,
+                                    unsigned long size)
+{
+       if (filter->inode != file->f_inode)
+               return false;
+
+       if (filter->offset > offset + size)
+               return false;
+
+       if (filter->offset + filter->size < offset)
+               return false;
+
+       return true;
+}
+
+static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
+{
+       struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+       struct vm_area_struct *vma = data;
+       unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
+       struct file *file = vma->vm_file;
+       struct perf_addr_filter *filter;
+       unsigned int restart = 0, count = 0;
+
+       if (!has_addr_filter(event))
+               return;
+
+       if (!file)
+               return;
+
+       raw_spin_lock_irqsave(&ifh->lock, flags);
+       list_for_each_entry(filter, &ifh->list, entry) {
+               if (perf_addr_filter_match(filter, file, off,
+                                            vma->vm_end - vma->vm_start)) {
+                       event->addr_filters_offs[count] = vma->vm_start;
+                       restart++;
+               }
+
+               count++;
+       }
+
+       if (restart)
+               event->addr_filters_gen++;
+       raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+       if (restart)
+               perf_event_restart(event);
+}
+
+/*
+ * Adjust all task's events' filters to the new vma
+ */
+static void perf_addr_filters_adjust(struct vm_area_struct *vma)
+{
+       struct perf_event_context *ctx;
+       int ctxn;
+
+       rcu_read_lock();
+       for_each_task_context_nr(ctxn) {
+               ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+               if (!ctx)
+                       continue;
+
+               perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+       }
+       rcu_read_unlock();
+}
+
 void perf_event_mmap(struct vm_area_struct *vma)
 {
        struct perf_mmap_event mmap_event;
@@ -6200,6 +6575,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
                /* .flags (attr_mmap2 only) */
        };
 
+       perf_addr_filters_adjust(vma);
        perf_event_mmap_event(&mmap_event);
 }
 
@@ -6491,10 +6867,7 @@ static int __perf_event_overflow(struct perf_event *event,
                irq_work_queue(&event->pending);
        }
 
-       if (event->overflow_handler)
-               event->overflow_handler(event, data, regs);
-       else
-               perf_event_output(event, data, regs);
+       event->overflow_handler(event, data, regs);
 
        if (*perf_event_fasync(event) && event->pending_kill) {
                event->pending_wakeup = 1;
@@ -7081,24 +7454,6 @@ static inline void perf_tp_register(void)
        perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
 }
 
-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
-{
-       char *filter_str;
-       int ret;
-
-       if (event->attr.type != PERF_TYPE_TRACEPOINT)
-               return -EINVAL;
-
-       filter_str = strndup_user(arg, PAGE_SIZE);
-       if (IS_ERR(filter_str))
-               return PTR_ERR(filter_str);
-
-       ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
-
-       kfree(filter_str);
-       return ret;
-}
-
 static void perf_event_free_filter(struct perf_event *event)
 {
        ftrace_profile_free_filter(event);
@@ -7153,11 +7508,6 @@ static inline void perf_tp_register(void)
 {
 }
 
-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
-{
-       return -ENOENT;
-}
-
 static void perf_event_free_filter(struct perf_event *event)
 {
 }
@@ -7185,6 +7535,387 @@ void perf_bp_event(struct perf_event *bp, void *data)
 }
 #endif
 
+/*
+ * Allocate a new address filter
+ */
+static struct perf_addr_filter *
+perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
+{
+       int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
+       struct perf_addr_filter *filter;
+
+       filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
+       if (!filter)
+               return NULL;
+
+       INIT_LIST_HEAD(&filter->entry);
+       list_add_tail(&filter->entry, filters);
+
+       return filter;
+}
+
+static void free_filters_list(struct list_head *filters)
+{
+       struct perf_addr_filter *filter, *iter;
+
+       list_for_each_entry_safe(filter, iter, filters, entry) {
+               if (filter->inode)
+                       iput(filter->inode);
+               list_del(&filter->entry);
+               kfree(filter);
+       }
+}
+
+/*
+ * Free existing address filters and optionally install new ones
+ */
+static void perf_addr_filters_splice(struct perf_event *event,
+                                    struct list_head *head)
+{
+       unsigned long flags;
+       LIST_HEAD(list);
+
+       if (!has_addr_filter(event))
+               return;
+
+       /* don't bother with children, they don't have their own filters */
+       if (event->parent)
+               return;
+
+       raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
+
+       list_splice_init(&event->addr_filters.list, &list);
+       if (head)
+               list_splice(head, &event->addr_filters.list);
+
+       raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
+
+       free_filters_list(&list);
+}
+
+/*
+ * Scan through mm's vmas and see if one of them matches the
+ * @filter; if so, adjust filter's address range.
+ * Called with mm::mmap_sem down for reading.
+ */
+static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
+                                           struct mm_struct *mm)
+{
+       struct vm_area_struct *vma;
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               struct file *file = vma->vm_file;
+               unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+               unsigned long vma_size = vma->vm_end - vma->vm_start;
+
+               if (!file)
+                       continue;
+
+               if (!perf_addr_filter_match(filter, file, off, vma_size))
+                       continue;
+
+               return vma->vm_start;
+       }
+
+       return 0;
+}
+
+/*
+ * Update event's address range filters based on the
+ * task's existing mappings, if any.
+ */
+static void perf_event_addr_filters_apply(struct perf_event *event)
+{
+       struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+       struct task_struct *task = READ_ONCE(event->ctx->task);
+       struct perf_addr_filter *filter;
+       struct mm_struct *mm = NULL;
+       unsigned int count = 0;
+       unsigned long flags;
+
+       /*
+        * We may observe TASK_TOMBSTONE, which means that the event tear-down
+        * will stop on the parent's child_mutex that our caller is also holding
+        */
+       if (task == TASK_TOMBSTONE)
+               return;
+
+       mm = get_task_mm(event->ctx->task);
+       if (!mm)
+               goto restart;
+
+       down_read(&mm->mmap_sem);
+
+       raw_spin_lock_irqsave(&ifh->lock, flags);
+       list_for_each_entry(filter, &ifh->list, entry) {
+               event->addr_filters_offs[count] = 0;
+
+               if (perf_addr_filter_needs_mmap(filter))
+                       event->addr_filters_offs[count] =
+                               perf_addr_filter_apply(filter, mm);
+
+               count++;
+       }
+
+       event->addr_filters_gen++;
+       raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+       up_read(&mm->mmap_sem);
+
+       mmput(mm);
+
+restart:
+       perf_event_restart(event);
+}
+
+/*
+ * Address range filtering: limiting the data to certain
+ * instruction address ranges. Filters are ioctl()ed to us from
+ * userspace as ascii strings.
+ *
+ * Filter string format:
+ *
+ * ACTION RANGE_SPEC
+ * where ACTION is one of the
+ *  * "filter": limit the trace to this region
+ *  * "start": start tracing from this address
+ *  * "stop": stop tracing at this address/region;
+ * RANGE_SPEC is
+ *  * for kernel addresses: <start address>[/<size>]
+ *  * for object files:     <start address>[/<size>]@</path/to/object/file>
+ *
+ * if <size> is not specified, the range is treated as a single address.
+ */
+enum {
+       IF_ACT_FILTER,
+       IF_ACT_START,
+       IF_ACT_STOP,
+       IF_SRC_FILE,
+       IF_SRC_KERNEL,
+       IF_SRC_FILEADDR,
+       IF_SRC_KERNELADDR,
+};
+
+enum {
+       IF_STATE_ACTION = 0,
+       IF_STATE_SOURCE,
+       IF_STATE_END,
+};
+
+static const match_table_t if_tokens = {
+       { IF_ACT_FILTER,        "filter" },
+       { IF_ACT_START,         "start" },
+       { IF_ACT_STOP,          "stop" },
+       { IF_SRC_FILE,          "%u/%u@%s" },
+       { IF_SRC_KERNEL,        "%u/%u" },
+       { IF_SRC_FILEADDR,      "%u@%s" },
+       { IF_SRC_KERNELADDR,    "%u" },
+};
+
+/*
+ * Address filter string parser
+ */
+static int
+perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
+                            struct list_head *filters)
+{
+       struct perf_addr_filter *filter = NULL;
+       char *start, *orig, *filename = NULL;
+       struct path path;
+       substring_t args[MAX_OPT_ARGS];
+       int state = IF_STATE_ACTION, token;
+       unsigned int kernel = 0;
+       int ret = -EINVAL;
+
+       orig = fstr = kstrdup(fstr, GFP_KERNEL);
+       if (!fstr)
+               return -ENOMEM;
+
+       while ((start = strsep(&fstr, " ,\n")) != NULL) {
+               ret = -EINVAL;
+
+               if (!*start)
+                       continue;
+
+               /* filter definition begins */
+               if (state == IF_STATE_ACTION) {
+                       filter = perf_addr_filter_new(event, filters);
+                       if (!filter)
+                               goto fail;
+               }
+
+               token = match_token(start, if_tokens, args);
+               switch (token) {
+               case IF_ACT_FILTER:
+               case IF_ACT_START:
+                       filter->filter = 1;
+
+               case IF_ACT_STOP:
+                       if (state != IF_STATE_ACTION)
+                               goto fail;
+
+                       state = IF_STATE_SOURCE;
+                       break;
+
+               case IF_SRC_KERNELADDR:
+               case IF_SRC_KERNEL:
+                       kernel = 1;
+
+               case IF_SRC_FILEADDR:
+               case IF_SRC_FILE:
+                       if (state != IF_STATE_SOURCE)
+                               goto fail;
+
+                       if (token == IF_SRC_FILE || token == IF_SRC_KERNEL)
+                               filter->range = 1;
+
+                       *args[0].to = 0;
+                       ret = kstrtoul(args[0].from, 0, &filter->offset);
+                       if (ret)
+                               goto fail;
+
+                       if (filter->range) {
+                               *args[1].to = 0;
+                               ret = kstrtoul(args[1].from, 0, &filter->size);
+                               if (ret)
+                                       goto fail;
+                       }
+
+                       if (token == IF_SRC_FILE) {
+                               filename = match_strdup(&args[2]);
+                               if (!filename) {
+                                       ret = -ENOMEM;
+                                       goto fail;
+                               }
+                       }
+
+                       state = IF_STATE_END;
+                       break;
+
+               default:
+                       goto fail;
+               }
+
+               /*
+                * Filter definition is fully parsed, validate and install it.
+                * Make sure that it doesn't contradict itself or the event's
+                * attribute.
+                */
+               if (state == IF_STATE_END) {
+                       if (kernel && event->attr.exclude_kernel)
+                               goto fail;
+
+                       if (!kernel) {
+                               if (!filename)
+                                       goto fail;
+
+                               /* look up the path and grab its inode */
+                               ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+                               if (ret)
+                                       goto fail_free_name;
+
+                               filter->inode = igrab(d_inode(path.dentry));
+                               path_put(&path);
+                               kfree(filename);
+                               filename = NULL;
+
+                               ret = -EINVAL;
+                               if (!filter->inode ||
+                                   !S_ISREG(filter->inode->i_mode))
+                                       /* free_filters_list() will iput() */
+                                       goto fail;
+                       }
+
+                       /* ready to consume more filters */
+                       state = IF_STATE_ACTION;
+                       filter = NULL;
+               }
+       }
+
+       if (state != IF_STATE_ACTION)
+               goto fail;
+
+       kfree(orig);
+
+       return 0;
+
+fail_free_name:
+       kfree(filename);
+fail:
+       free_filters_list(filters);
+       kfree(orig);
+
+       return ret;
+}
+
+static int
+perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
+{
+       LIST_HEAD(filters);
+       int ret;
+
+       /*
+        * Since this is called in perf_ioctl() path, we're already holding
+        * ctx::mutex.
+        */
+       lockdep_assert_held(&event->ctx->mutex);
+
+       if (WARN_ON_ONCE(event->parent))
+               return -EINVAL;
+
+       /*
+        * For now, we only support filtering in per-task events; doing so
+        * for CPU-wide events requires additional context switching trickery,
+        * since same object code will be mapped at different virtual
+        * addresses in different processes.
+        */
+       if (!event->ctx->task)
+               return -EOPNOTSUPP;
+
+       ret = perf_event_parse_addr_filter(event, filter_str, &filters);
+       if (ret)
+               return ret;
+
+       ret = event->pmu->addr_filters_validate(&filters);
+       if (ret) {
+               free_filters_list(&filters);
+               return ret;
+       }
+
+       /* remove existing filters, if any */
+       perf_addr_filters_splice(event, &filters);
+
+       /* install new filters */
+       perf_event_for_each_child(event, perf_event_addr_filters_apply);
+
+       return ret;
+}
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+       char *filter_str;
+       int ret = -EINVAL;
+
+       if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
+           !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
+           !has_addr_filter(event))
+               return -EINVAL;
+
+       filter_str = strndup_user(arg, PAGE_SIZE);
+       if (IS_ERR(filter_str))
+               return PTR_ERR(filter_str);
+
+       if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
+           event->attr.type == PERF_TYPE_TRACEPOINT)
+               ret = ftrace_profile_set_filter(event, event->attr.config,
+                                               filter_str);
+       else if (has_addr_filter(event))
+               ret = perf_event_set_addr_filter(event, filter_str);
+
+       kfree(filter_str);
+       return ret;
+}
+
 /*
  * hrtimer based swevent callback
  */
@@ -7542,6 +8273,20 @@ static void free_pmu_context(struct pmu *pmu)
 out:
        mutex_unlock(&pmus_lock);
 }
+
+/*
+ * Let userspace know that this PMU supports address range filtering:
+ */
+static ssize_t nr_addr_filters_show(struct device *dev,
+                                   struct device_attribute *attr,
+                                   char *page)
+{
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
+}
+DEVICE_ATTR_RO(nr_addr_filters);
+
 static struct idr pmu_idr;
 
 static ssize_t
@@ -7643,9 +8388,19 @@ static int pmu_dev_alloc(struct pmu *pmu)
        if (ret)
                goto free_dev;
 
+       /* For PMUs with address filters, throw in an extra attribute: */
+       if (pmu->nr_addr_filters)
+               ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
+
+       if (ret)
+               goto del_dev;
+
 out:
        return ret;
 
+del_dev:
+       device_del(pmu->dev);
+
 free_dev:
        put_device(pmu->dev);
        goto out;
@@ -7685,6 +8440,21 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
        }
 
 skip_type:
+       if (pmu->task_ctx_nr == perf_hw_context) {
+               static int hw_context_taken = 0;
+
+               /*
+                * Other than systems with heterogeneous CPUs, it never makes
+                * sense for two PMUs to share perf_hw_context. PMUs which are
+                * uncore must use perf_invalid_context.
+                */
+               if (WARN_ON_ONCE(hw_context_taken &&
+                   !(pmu->capabilities & PERF_PMU_CAP_HETEROGENEOUS_CPUS)))
+                       pmu->task_ctx_nr = perf_invalid_context;
+
+               hw_context_taken = 1;
+       }
+
        pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
        if (pmu->pmu_cpu_context)
                goto got_cpu_context;
@@ -7772,6 +8542,8 @@ void perf_pmu_unregister(struct pmu *pmu)
        free_percpu(pmu->pmu_disable_count);
        if (pmu->type >= PERF_TYPE_MAX)
                idr_remove(&pmu_idr, pmu->type);
+       if (pmu->nr_addr_filters)
+               device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
        device_del(pmu->dev);
        put_device(pmu->dev);
        free_pmu_context(pmu);
@@ -7965,6 +8737,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        INIT_LIST_HEAD(&event->sibling_list);
        INIT_LIST_HEAD(&event->rb_entry);
        INIT_LIST_HEAD(&event->active_entry);
+       INIT_LIST_HEAD(&event->addr_filters.list);
        INIT_HLIST_NODE(&event->hlist_entry);
 
 
@@ -7972,6 +8745,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        init_irq_work(&event->pending, perf_pending_event);
 
        mutex_init(&event->mmap_mutex);
+       raw_spin_lock_init(&event->addr_filters.lock);
 
        atomic_long_set(&event->refcount, 1);
        event->cpu              = cpu;
@@ -8006,8 +8780,16 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                context = parent_event->overflow_handler_context;
        }
 
-       event->overflow_handler = overflow_handler;
-       event->overflow_handler_context = context;
+       if (overflow_handler) {
+               event->overflow_handler = overflow_handler;
+               event->overflow_handler_context = context;
+       } else if (is_write_backward(event)){
+               event->overflow_handler = perf_event_output_backward;
+               event->overflow_handler_context = NULL;
+       } else {
+               event->overflow_handler = perf_event_output_forward;
+               event->overflow_handler_context = NULL;
+       }
 
        perf_event__state_init(event);
 
@@ -8048,11 +8830,22 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        if (err)
                goto err_pmu;
 
+       if (has_addr_filter(event)) {
+               event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
+                                                  sizeof(unsigned long),
+                                                  GFP_KERNEL);
+               if (!event->addr_filters_offs)
+                       goto err_per_task;
+
+               /* force hw sync on the address filters */
+               event->addr_filters_gen = 1;
+       }
+
        if (!event->parent) {
                if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
                        err = get_callchain_buffers();
                        if (err)
-                               goto err_per_task;
+                               goto err_addr_filters;
                }
        }
 
@@ -8061,6 +8854,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
        return event;
 
+err_addr_filters:
+       kfree(event->addr_filters_offs);
+
 err_per_task:
        exclusive_event_destroy(event);
 
@@ -8239,6 +9035,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
        if (output_event->clock != event->clock)
                goto out;
 
+       /*
+        * Either writing ring buffer from beginning or from end.
+        * Mixing is not allowed.
+        */
+       if (is_write_backward(output_event) != is_write_backward(event))
+               goto out;
+
        /*
         * If both events generate aux data, they must be on the same PMU
         */
index 4199b6d193f58143ec8ccfe16a055a0c5d54608c..05f9f6d626df153457fd79c360625bc860bb9bc3 100644 (file)
 struct ring_buffer {
        atomic_t                        refcount;
        struct rcu_head                 rcu_head;
-       struct irq_work                 irq_work;
 #ifdef CONFIG_PERF_USE_VMALLOC
        struct work_struct              work;
        int                             page_order;     /* allocation order  */
 #endif
        int                             nr_pages;       /* nr of data pages  */
        int                             overwrite;      /* can overwrite itself */
+       int                             paused;         /* can write into ring buffer */
 
        atomic_t                        poll;           /* POLL_ for wakeups */
 
@@ -65,6 +65,14 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
        rb_free(rb);
 }
 
+static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
+{
+       if (!pause && rb->nr_pages)
+               rb->paused = 0;
+       else
+               rb->paused = 1;
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
index c61f0cbd308b5b4456e69c1539f9b334a34c0632..c49bab42dc574d47a26bf0c52a6e2690161729d0 100644 (file)
@@ -102,8 +102,21 @@ out:
        preempt_enable();
 }
 
-int perf_output_begin(struct perf_output_handle *handle,
-                     struct perf_event *event, unsigned int size)
+static bool __always_inline
+ring_buffer_has_space(unsigned long head, unsigned long tail,
+                     unsigned long data_size, unsigned int size,
+                     bool backward)
+{
+       if (!backward)
+               return CIRC_SPACE(head, tail, data_size) >= size;
+       else
+               return CIRC_SPACE(tail, head, data_size) >= size;
+}
+
+static int __always_inline
+__perf_output_begin(struct perf_output_handle *handle,
+                   struct perf_event *event, unsigned int size,
+                   bool backward)
 {
        struct ring_buffer *rb;
        unsigned long tail, offset, head;
@@ -125,8 +138,11 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (unlikely(!rb))
                goto out;
 
-       if (unlikely(!rb->nr_pages))
+       if (unlikely(rb->paused)) {
+               if (rb->nr_pages)
+                       local_inc(&rb->lost);
                goto out;
+       }
 
        handle->rb    = rb;
        handle->event = event;
@@ -143,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle,
        do {
                tail = READ_ONCE(rb->user_page->data_tail);
                offset = head = local_read(&rb->head);
-               if (!rb->overwrite &&
-                   unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
-                       goto fail;
+               if (!rb->overwrite) {
+                       if (unlikely(!ring_buffer_has_space(head, tail,
+                                                           perf_data_size(rb),
+                                                           size, backward)))
+                               goto fail;
+               }
 
                /*
                 * The above forms a control dependency barrier separating the
@@ -159,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle,
                 * See perf_output_put_handle().
                 */
 
-               head += size;
+               if (!backward)
+                       head += size;
+               else
+                       head -= size;
        } while (local_cmpxchg(&rb->head, offset, head) != offset);
 
+       if (backward) {
+               offset = head;
+               head = (u64)(-head);
+       }
+
        /*
         * We rely on the implied barrier() by local_cmpxchg() to ensure
         * none of the data stores below can be lifted up by the compiler.
@@ -203,6 +230,26 @@ out:
        return -ENOSPC;
 }
 
+int perf_output_begin_forward(struct perf_output_handle *handle,
+                            struct perf_event *event, unsigned int size)
+{
+       return __perf_output_begin(handle, event, size, false);
+}
+
+int perf_output_begin_backward(struct perf_output_handle *handle,
+                              struct perf_event *event, unsigned int size)
+{
+       return __perf_output_begin(handle, event, size, true);
+}
+
+int perf_output_begin(struct perf_output_handle *handle,
+                     struct perf_event *event, unsigned int size)
+{
+
+       return __perf_output_begin(handle, event, size,
+                                  unlikely(is_write_backward(event)));
+}
+
 unsigned int perf_output_copy(struct perf_output_handle *handle,
                      const void *buf, unsigned int len)
 {
@@ -221,8 +268,6 @@ void perf_output_end(struct perf_output_handle *handle)
        rcu_read_unlock();
 }
 
-static void rb_irq_work(struct irq_work *work);
-
 static void
 ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 {
@@ -243,16 +288,13 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 
        INIT_LIST_HEAD(&rb->event_list);
        spin_lock_init(&rb->event_lock);
-       init_irq_work(&rb->irq_work, rb_irq_work);
-}
 
-static void ring_buffer_put_async(struct ring_buffer *rb)
-{
-       if (!atomic_dec_and_test(&rb->refcount))
-               return;
-
-       rb->rcu_head.next = (void *)rb;
-       irq_work_queue(&rb->irq_work);
+       /*
+        * perf_output_begin() only checks rb->paused, therefore
+        * rb->paused must be true if we have no pages for output.
+        */
+       if (!rb->nr_pages)
+               rb->paused = 1;
 }
 
 /*
@@ -264,6 +306,10 @@ static void ring_buffer_put_async(struct ring_buffer *rb)
  * The ordering is similar to that of perf_output_{begin,end}, with
  * the exception of (B), which should be taken care of by the pmu
  * driver, since ordering rules will differ depending on hardware.
+ *
+ * Call this from pmu::start(); see the comment in perf_aux_output_end()
+ * about its use in pmu callbacks. Both can also be called from the PMI
+ * handler if needed.
  */
 void *perf_aux_output_begin(struct perf_output_handle *handle,
                            struct perf_event *event)
@@ -287,6 +333,13 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
        if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
                goto err;
 
+       /*
+        * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
+        * the aux buffer is in perf_mmap_close(), about to get freed.
+        */
+       if (!atomic_read(&rb->aux_mmap_count))
+               goto err_put;
+
        /*
         * Nesting is not supported for AUX area, make sure nested
         * writers are caught early
@@ -328,10 +381,11 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
        return handle->rb->aux_priv;
 
 err_put:
+       /* can't be last */
        rb_free_aux(rb);
 
 err:
-       ring_buffer_put_async(rb);
+       ring_buffer_put(rb);
        handle->event = NULL;
 
        return NULL;
@@ -342,6 +396,10 @@ err:
  * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
  * pmu driver's responsibility to observe ordering rules of the hardware,
  * so that all the data is externally visible before this is called.
+ *
+ * Note: this has to be called from pmu::stop() callback, as the assumption
+ * of the AUX buffer management code is that after pmu::stop(), the AUX
+ * transaction must be stopped and therefore drop the AUX reference count.
  */
 void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
                         bool truncated)
@@ -381,8 +439,9 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
        handle->event = NULL;
 
        local_set(&rb->aux_nest, 0);
+       /* can't be last */
        rb_free_aux(rb);
-       ring_buffer_put_async(rb);
+       ring_buffer_put(rb);
 }
 
 /*
@@ -463,6 +522,14 @@ static void __rb_free_aux(struct ring_buffer *rb)
 {
        int pg;
 
+       /*
+        * Should never happen, the last reference should be dropped from
+        * perf_mmap_close() path, which first stops aux transactions (which
+        * in turn are the atomic holders of aux_refcount) and then does the
+        * last rb_free_aux().
+        */
+       WARN_ON_ONCE(in_atomic());
+
        if (rb->aux_priv) {
                rb->free_aux(rb->aux_priv);
                rb->free_aux = NULL;
@@ -574,18 +641,7 @@ out:
 void rb_free_aux(struct ring_buffer *rb)
 {
        if (atomic_dec_and_test(&rb->aux_refcount))
-               irq_work_queue(&rb->irq_work);
-}
-
-static void rb_irq_work(struct irq_work *work)
-{
-       struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
-
-       if (!atomic_read(&rb->aux_refcount))
                __rb_free_aux(rb);
-
-       if (rb->rcu_head.next == (void *)rb)
-               call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
 #ifndef CONFIG_PERF_USE_VMALLOC
index 725587f10667eef64326263941bea17b7fb82cf1..c8b318663525d02b2098238341aca72c701966fe 100644 (file)
@@ -130,6 +130,9 @@ static int one_thousand = 1000;
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 10000;
 #endif
+#ifdef CONFIG_PERF_EVENTS
+static int six_hundred_forty_kb = 640 * 1024;
+#endif
 
 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one_hundred,
        },
+       {
+               .procname       = "perf_event_max_stack",
+               .data           = NULL, /* filled in by handler */
+               .maxlen         = sizeof(sysctl_perf_event_max_stack),
+               .mode           = 0644,
+               .proc_handler   = perf_event_max_stack_handler,
+               .extra1         = &zero,
+               .extra2         = &six_hundred_forty_kb,
+       },
 #endif
 #ifdef CONFIG_KMEMCHECK
        {
index 00df25fd86ef458b4ee23d645efda32426af2568..e11108f1d19735026e66fe7336650a9c7559501e 100644 (file)
@@ -47,6 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
                if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
+               if (!is_sampling_event(p_event))
+                       return 0;
+
                /*
                 * We don't allow user space callchains for  function trace
                 * event, due to issues with page faults while tracing page
index 60c7e6c8ff178c3cd6d1c7a9b04ae4b7fc00684f..6bf68fe7dd290dfeb9f51969f6b0d6d0cb38ab4c 100644 (file)
@@ -137,7 +137,8 @@ libsubcmd_clean:
        $(call descend,lib/subcmd,clean)
 
 perf_clean:
-       $(call descend,$(@:_clean=),clean)
+       $(Q)mkdir -p $(PERF_O) .
+       $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
 
 selftests_clean:
        $(call descend,testing/$(@:_clean=),clean)
index 6b7707270aa3b19791c8b6248f90ecddeabc1fdd..9f878619077aeb9ecbb50f87b381706d083def37 100644 (file)
@@ -30,6 +30,7 @@ endef
 FEATURE_TESTS_BASIC :=                 \
        backtrace                       \
        dwarf                           \
+       dwarf_getlocations              \
        fortify-source                  \
        sync-compare-and-swap           \
        glibc                           \
@@ -78,6 +79,7 @@ endif
 
 FEATURE_DISPLAY ?=                     \
        dwarf                           \
+       dwarf_getlocations              \
        glibc                           \
        gtk2                            \
        libaudit                        \
index c5f4c417428d7099fbe4f487a179b0663f478611..4ae94dbfdab98d5181e18d9d8864a24f942bdc53 100644 (file)
@@ -3,6 +3,7 @@ FILES=                                  \
        test-backtrace.bin              \
        test-bionic.bin                 \
        test-dwarf.bin                  \
+       test-dwarf_getlocations.bin     \
        test-fortify-source.bin         \
        test-sync-compare-and-swap.bin  \
        test-glibc.bin                  \
@@ -82,6 +83,9 @@ endif
 $(OUTPUT)test-dwarf.bin:
        $(BUILD) $(DWARFLIBS)
 
+$(OUTPUT)test-dwarf_getlocations.bin:
+       $(BUILD) $(DWARFLIBS)
+
 $(OUTPUT)test-libelf-mmap.bin:
        $(BUILD) -lelf
 
index e499a36c1e4a9e21e9c355309b53a7dc5901664a..a282e8cb84f308da358983ebccbf80c612e7d061 100644 (file)
 # include "test-dwarf.c"
 #undef main
 
+#define main main_test_dwarf_getlocations
+# include "test-dwarf_getlocations.c"
+#undef main
+
 #define main main_test_libelf_getphdrnum
 # include "test-libelf-getphdrnum.c"
 #undef main
@@ -143,6 +147,7 @@ int main(int argc, char *argv[])
        main_test_libelf_mmap();
        main_test_glibc();
        main_test_dwarf();
+       main_test_dwarf_getlocations();
        main_test_libelf_getphdrnum();
        main_test_libunwind();
        main_test_libaudit();
index b389026839b97c0e35738f52564e87036fb79735..e04ab89a1013bdee0b4d0e1f887c00fa0f521aec 100644 (file)
@@ -27,10 +27,9 @@ int main(void)
        attr.log_level = 0;
        attr.kern_version = 0;
 
-       attr = attr;
        /*
         * Test existence of __NR_bpf and BPF_PROG_LOAD.
         * This call should fail if we run the testcase.
         */
-       return syscall(__NR_bpf, BPF_PROG_LOAD, attr, sizeof(attr));
+       return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
 }
diff --git a/tools/build/feature/test-dwarf_getlocations.c b/tools/build/feature/test-dwarf_getlocations.c
new file mode 100644 (file)
index 0000000..7016269
--- /dev/null
@@ -0,0 +1,12 @@
+#include <stdlib.h>
+#include <elfutils/libdw.h>
+
+int main(void)
+{
+       Dwarf_Addr base, start, end;
+       Dwarf_Attribute attr;
+       Dwarf_Op *op;
+        size_t nops;
+       ptrdiff_t offset = 0;
+        return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops);
+}
index ef78c22ff44d4142f01a1f5aea3bbe4e15fd0101..08556cf2c70d400666de1b4b2db6d7d2d05c0e04 100644 (file)
@@ -351,6 +351,19 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
        return err;
 }
 
+int procfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+       char path[PATH_MAX];
+       const char *procfs = procfs__mountpoint();
+
+       if (!procfs)
+               return -1;
+
+       snprintf(path, sizeof(path), "%s/%s", procfs, entry);
+
+       return filename__read_str(path, buf, sizep);
+}
+
 int sysfs__read_ull(const char *entry, unsigned long long *value)
 {
        char path[PATH_MAX];
index 9f6598098dc5804a5bf660013ad5b07c499b6169..16c9c2ed7c5bfb85e270399f4d6a7253d9a367b3 100644 (file)
@@ -29,6 +29,8 @@ int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
+int procfs__read_str(const char *entry, char **buf, size_t *sizep);
+
 int sysctl__read_int(const char *sysctl, int *value);
 int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
index be764f9ec7691a3d2357214cbe1af9c6c333ad92..c6c8318e38a2efbf52d264b4bf945aefba40272a 100644 (file)
@@ -672,6 +672,7 @@ The letters are:
        d       create a debug log
        g       synthesize a call chain (use with i or x)
        l       synthesize last branch entries (use with i or x)
+       s       skip initial number of events
 
 "Instructions" events look like they were recorded by "perf record -e
 instructions".
@@ -730,6 +731,12 @@ from one sample to the next.
 
 To disable trace decoding entirely, use the option --no-itrace.
 
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+       --itrace=i0nss1000000
+
+skips the first million instructions.
 
 dump option
 -----------
index 65453f4c700604f8a259df384c6e52988ce81b14..e2a4c5e0dbe5b078a4a54b38a65007d00b06c94a 100644 (file)
@@ -7,6 +7,7 @@
                d       create a debug log
                g       synthesize a call chain (use with i or x)
                l       synthesize last branch entries (use with i or x)
+               s       skip initial number of events
 
        The default is all events i.e. the same as --itrace=ibxe
 
 
        Also the number of last branch entries (default 64, max. 1024) for
        instructions or transactions events can be specified.
+
+       It is also possible to skip events generated (instructions, branches, transactions)
+       at the beginning. This is useful to ignore initialization code.
+
+       --itrace=i0nss1000000
+
+       skips the first million instructions.
index e9cd39a92dc220dcfb4cef0ee0a8c98dc4b93199..778f54d4d0bd7516c3342d2fdda129138bc28779 100644 (file)
@@ -33,7 +33,7 @@ OPTIONS
 
 -f::
 --force::
-        Don't complain, do it.
+        Don't do ownership validation.
 
 -v::
 --verbose::
index d1deb573877fe5d84a8a7a7a63fe700585e4d8d5..3e9490b9c5334486d9787d52e531e1002c917c47 100644 (file)
@@ -75,7 +75,7 @@ OPTIONS
 
 -f::
 --force::
-       Don't complain, do it.
+        Don't do ownership validation.
 
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
index ec723d0a5bb3fda751bf66491c736db10e42cde3..a126e97a81143b6492d0ad9ca7b8648a857cf492 100644 (file)
@@ -93,6 +93,67 @@ raw encoding of 0x1A8 can be used:
 You should refer to the processor specific documentation for getting these
 details. Some of them are referenced in the SEE ALSO section below.
 
+ARBITRARY PMUS
+--------------
+
+perf also supports an extended syntax for specifying raw parameters
+to PMUs. Using this typically requires looking up the specific event
+in the CPU vendor specific documentation.
+
+The available PMUs and their raw parameters can be listed with
+
+  ls /sys/devices/*/format
+
+For example the raw event "LSD.UOPS" core pmu event above could
+be specified as
+
+  perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
+
+PER SOCKET PMUS
+---------------
+
+Some PMUs are not associated with a core, but with a whole CPU socket.
+Events on these PMUs generally cannot be sampled, but only counted globally
+with perf stat -a. They can be bound to one logical CPU, but will measure
+all the CPUs in the same socket.
+
+This example measures memory bandwidth every second
+on the first memory controller on socket 0 of a Intel Xeon system
+
+  perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
+
+Each memory controller has its own PMU.  Measuring the complete system
+bandwidth would require specifying all imc PMUs (see perf list output),
+and adding the values together.
+
+This example measures the combined core power every second
+
+  perf stat -I 1000 -e power/energy-cores/  -a
+
+ACCESS RESTRICTIONS
+-------------------
+
+For non root users generally only context switched PMU events are available.
+This is normally only the events in the cpu PMU, the predefined events
+like cycles and instructions and some software events.
+
+Other PMUs and global measurements are normally root only.
+Some event qualifiers, such as "any", are also root only.
+
+This can be overriden by setting the kernel.perf_event_paranoid
+sysctl to -1, which allows non root to use these events.
+
+For accessing trace point events perf needs to have read access to
+/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
+setting.
+
+TRACING
+-------
+
+Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
+that allows low overhead execution tracing.  These are described in a separate
+intel-pt.txt document.
+
 PARAMETERIZED EVENTS
 --------------------
 
@@ -106,6 +167,50 @@ also be supplied. For example:
 
   perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
 
+EVENT GROUPS
+------------
+
+Perf supports time based multiplexing of events, when the number of events
+active exceeds the number of hardware performance counters. Multiplexing
+can cause measurement errors when the workload changes its execution
+profile.
+
+When metrics are computed using formulas from event counts, it is useful to
+ensure some events are always measured together as a group to minimize multiplexing
+errors. Event groups can be specified using { }.
+
+  perf stat -e '{instructions,cycles}' ...
+
+The number of available performance counters depend on the CPU. A group
+cannot contain more events than available counters.
+For example Intel Core CPUs typically have four generic performance counters
+for the core, plus three fixed counters for instructions, cycles and
+ref-cycles. Some special events have restrictions on which counter they
+can schedule, and may not support multiple instances in a single group.
+When too many events are specified in the group none of them will not
+be measured.
+
+Globally pinned events can limit the number of counters available for
+other groups. On x86 systems, the NMI watchdog pins a counter by default.
+The nmi watchdog can be disabled as root with
+
+       echo 0 > /proc/sys/kernel/nmi_watchdog
+
+Events from multiple different PMUs cannot be mixed in a group, with
+some exceptions for software events.
+
+LEADER SAMPLING
+---------------
+
+perf also supports group leader sampling using the :S specifier.
+
+  perf record -e '{cycles,instructions}:S' ...
+  perf report --group
+
+Normally all events in a event group sample, but with :S only
+the first event (the leader) samples, and it only reads the values of the
+other events in the group.
+
 OPTIONS
 -------
 
@@ -143,5 +248,5 @@ SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-top[1],
 linkperf:perf-record[1],
-http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
+http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
 http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
index 43310d8661fedfbee4e24f8b803acdd91fe49d56..1d6092c460dd085401cbe084fed2166d84de105c 100644 (file)
@@ -48,6 +48,14 @@ OPTIONS
        option can be passed in record mode. It will be interpreted the same way as perf
        record.
 
+-K::
+--all-kernel::
+       Configure all used events to run in kernel space.
+
+-U::
+--all-user::
+       Configure all used events to run in user space.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
index 19aa17532a16709646dc52487c5f686675c2b658..8dbee832abd9e04cb6a8b5dead675fa27b0f39bb 100644 (file)
@@ -347,6 +347,19 @@ Configure all used events to run in kernel space.
 --all-user::
 Configure all used events to run in user space.
 
+--timestamp-filename
+Append timestamp to output file name.
+
+--switch-output::
+Generate multiple perf.data files, timestamp prefixed, switching to a new one
+when receiving a SIGUSR2.
+
+A possible use case is to, given an external event, slice the perf.data file
+that gets then processed, possibly via a perf script, to decide if that
+particular perf.data snapshot should be kept or not.
+
+Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
index 12113992ac9d0f5ceca0b003cec568717c4208e5..ebaf849e30efd15bce04edb37252fb4aed66441f 100644 (file)
@@ -248,7 +248,7 @@ OPTIONS
        Note that when using the --itrace option the synthesized callchain size
        will override this value if the synthesized callchain size is bigger.
 
-       Default: 127
+       Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 -G::
 --inverted::
@@ -285,7 +285,7 @@ OPTIONS
 
 -f::
 --force::
-        Don't complain, do it.
+        Don't do ownership validation.
 
 --symfs=<directory>::
         Look for files with symbols relative to this directory.
index 8ff4df95695128259abbd60e111283770f3a4b76..1cc08cc47ac534b169d6cbce5eaf632f5954e1b1 100644 (file)
@@ -50,6 +50,22 @@ OPTIONS
 --dump-raw-trace=::
         Display verbose dump of the sched data.
 
+OPTIONS for 'perf sched map'
+----------------------------
+
+--compact::
+       Show only CPUs with activity. Helps visualizing on high core
+       count systems.
+
+--cpus::
+       Show just entries with activities for the given CPUs.
+
+--color-cpus::
+       Highlight the given cpus.
+
+--color-pids::
+       Highlight the given pids.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
index 382ddfb45d1dbbb4a65bfe464c29096923b64bd3..a856a1095893cab0a0d3f7fdff39820090c99422 100644 (file)
@@ -259,9 +259,23 @@ include::itrace.txt[]
 --full-source-path::
        Show the full path for source files for srcline output.
 
+--max-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        beyond the specified depth will be ignored. This is a trade-off
+        between information loss and faster processing especially for
+        workloads that can have a very long callchain stack.
+        Note that when using the --itrace option the synthesized callchain size
+        will override this value if the synthesized callchain size is bigger.
+
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
 --ns::
        Use 9 decimal places when displaying time (i.e. show the nanoseconds)
 
+-f::
+--force::
+       Don't do ownership validation.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
index 19f046f027cd81e42c5696ab3172539baaeb745d..91d638df3a6bb9e6e9e8c10bb7ad3d2fc1e9efd0 100644 (file)
@@ -177,7 +177,7 @@ Default is to monitor all CPUS.
        between information loss and faster processing especially for
        workloads that can have a very long callchain stack.
 
-       Default: 127
+       Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --ignore-callees=<regex>::
         Ignore callees of the function(s) matching the given regex.
index 13293de8869fe932c9610f3a3c5612c838a65bc4..6afe20121bc06d671931a3d22d6eeca2ca35c0a6 100644 (file)
@@ -117,9 +117,41 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --syscalls::
        Trace system calls. This options is enabled by default.
 
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+        Setup and enable call-graph (stack chain/backtrace) recording.
+        See `--call-graph` section in perf-record and perf-report
+        man pages for details. The ones that are most useful in 'perf trace'
+        are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
+
+        Using this will, for the root user, bump the value of --mmap-pages to 4
+        times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
+        sysctl. This is done only if the user doesn't specify a --mmap-pages value.
+
+--kernel-syscall-graph::
+        Show the kernel callchains on the syscall exit path.
+
 --event::
        Trace other events, see 'perf list' for a complete list.
 
+--max-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        beyond the specified depth will be ignored. Note that at this point
+        this is just about the presentation part, i.e. the kernel is still
+        not limiting, the overhead of callchains needs to be set via the
+        knobs in --call-graph dwarf.
+
+        Implies '--call-graph dwarf' when --call-graph not present on the
+        command line, on systems where DWARF unwinding was built in.
+
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
+--min-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        below the specified depth will be ignored. Disabled by default.
+
+        Implies '--call-graph dwarf' when --call-graph not present on the
+        command line, on systems where DWARF unwinding was built in.
+
 --proc-map-timeout::
        When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
        because the file may be huge. A time out is needed in such cases.
index 000ea210389d3aeac32be7e9aaf810faae99b38f..bde8cbae7dd98b732060114e22426494ee0df067 100644 (file)
@@ -183,6 +183,11 @@ endif
 include config/Makefile
 endif
 
+ifeq ($(config),0)
+include $(srctree)/tools/scripts/Makefile.arch
+-include arch/$(ARCH)/Makefile
+endif
+
 # The FEATURE_DUMP_EXPORT holds location of the actual
 # FEATURE_DUMP file to be used to bypass feature detection
 # (for bpf or any other subproject)
@@ -297,8 +302,6 @@ endif
 # because maintaining the nesting to match is a pain.  If
 # we had "elif" things would have been much nicer...
 
--include arch/$(ARCH)/Makefile
-
 ifneq ($(OUTPUT),)
   CFLAGS += -I$(OUTPUT)
 endif
@@ -390,7 +393,7 @@ endif
 __build-dir = $(subst $(OUTPUT),,$(dir $@))
 build-dir   = $(if $(__build-dir),$(__build-dir),.)
 
-prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
 
 $(OUTPUT)%.o: %.c prepare FORCE
        $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -430,7 +433,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
 LIBPERF_IN := $(OUTPUT)libperf-in.o
 
-$(LIBPERF_IN): fixdep FORCE
+$(LIBPERF_IN): prepare fixdep FORCE
        $(Q)$(MAKE) $(build)=libperf
 
 $(LIB_FILE): $(LIBPERF_IN)
@@ -625,7 +628,7 @@ config-clean:
        $(call QUIET_CLEAN, config)
        $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
 
-clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
        $(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
        $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
        $(Q)$(RM) $(OUTPUT).config-detected
@@ -662,5 +665,5 @@ FORCE:
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
-.PHONY: libtraceevent_plugins
+.PHONY: libtraceevent_plugins archheaders
 
index 56e05f126ad8793d25bb1a70ce3dac5f100d3b3a..cc3930904d68951db327a4b2dbcc2841c71e106d 100644 (file)
@@ -3,4 +3,5 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 
 HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 PERF_HAVE_JITDUMP := 1
index 733151cdf46e08397fae0f293c73222cc54ef21e..41bdf9530d821d6b14d7bd1eb007e2ddda06a5a9 100644 (file)
  */
 
 #include <stddef.h>
+#include <errno.h>
+#include <string.h>
 #include <dwarf-regs.h>
-
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include "util.h"
 
 struct pt_regs_dwarfnum {
        const char *name;
        unsigned int dwarfnum;
+       unsigned int ptregs_offset;
 };
 
-#define STR(s) #s
-#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
-#define GPR_DWARFNUM_NAME(num) \
-       {.name = STR(%gpr##num), .dwarfnum = num}
-#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+#define REG_DWARFNUM_NAME(r, num)                                      \
+               {.name = STR(%)STR(r), .dwarfnum = num,                 \
+               .ptregs_offset = offsetof(struct pt_regs, r)}
+#define GPR_DWARFNUM_NAME(num)                                         \
+               {.name = STR(%gpr##num), .dwarfnum = num,               \
+               .ptregs_offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
 
 /*
  * Reference:
@@ -61,12 +68,12 @@ static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
        GPR_DWARFNUM_NAME(29),
        GPR_DWARFNUM_NAME(30),
        GPR_DWARFNUM_NAME(31),
-       REG_DWARFNUM_NAME("%msr",   66),
-       REG_DWARFNUM_NAME("%ctr",   109),
-       REG_DWARFNUM_NAME("%link",  108),
-       REG_DWARFNUM_NAME("%xer",   101),
-       REG_DWARFNUM_NAME("%dar",   119),
-       REG_DWARFNUM_NAME("%dsisr", 118),
+       REG_DWARFNUM_NAME(msr,   66),
+       REG_DWARFNUM_NAME(ctr,   109),
+       REG_DWARFNUM_NAME(link,  108),
+       REG_DWARFNUM_NAME(xer,   101),
+       REG_DWARFNUM_NAME(dar,   119),
+       REG_DWARFNUM_NAME(dsisr, 118),
        REG_DWARFNUM_END,
 };
 
@@ -86,3 +93,12 @@ const char *get_arch_regstr(unsigned int n)
                        return roff->name;
        return NULL;
 }
+
+int regs_query_register_offset(const char *name)
+{
+       const struct pt_regs_dwarfnum *roff;
+       for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+               if (!strcmp(roff->name, name))
+                       return roff->ptregs_offset;
+       return -EINVAL;
+}
index bbc1a50768dd5de5183f1890a806f710545bf28e..c6d0f91731a14732333af62d0a40a3ea43fb4c99 100644 (file)
@@ -19,12 +19,6 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
               ehdr.e_type == ET_DYN;
 }
 
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-void arch__elf_sym_adjust(GElf_Sym *sym)
-{
-       sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
-}
-#endif
 #endif
 
 #if !defined(_CALL_ELF) || _CALL_ELF != 2
@@ -65,18 +59,45 @@ bool arch__prefers_symtab(void)
        return true;
 }
 
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__sym_update(struct symbol *s, GElf_Sym *sym)
+{
+       s->arch_sym = sym->st_other;
+}
+#endif
+
 #define PPC64LE_LEP_OFFSET     8
 
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
-                            struct probe_trace_event *tev, struct map *map)
+                            struct probe_trace_event *tev, struct map *map,
+                            struct symbol *sym)
 {
+       int lep_offset;
+
        /*
-        * ppc64 ABIv2 local entry point is currently always 2 instructions
-        * (8 bytes) after the global entry point.
+        * When probing at a function entry point, we normally always want the
+        * LEP since that catches calls to the function through both the GEP and
+        * the LEP. Hence, we would like to probe at an offset of 8 bytes if
+        * the user only specified the function entry.
+        *
+        * However, if the user specifies an offset, we fall back to using the
+        * GEP since all userspace applications (objdump/readelf) show function
+        * disassembly with offsets from the GEP.
+        *
+        * In addition, we shouldn't specify an offset for kretprobes.
         */
-       if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
-               tev->point.address += PPC64LE_LEP_OFFSET;
+       if (pev->point.offset || pev->point.retprobe || !map || !sym)
+               return;
+
+       lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
+
+       if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
                tev->point.offset += PPC64LE_LEP_OFFSET;
+       else if (lep_offset) {
+               if (pev->uprobes)
+                       tev->point.address += lep_offset;
+               else
+                       tev->point.offset += lep_offset;
        }
 }
 #endif
index 269af21437353b2fb886383ede1e3f9a2f586f25..6c9211b18ec0960c31fa7486cbc55021abbf72d1 100644 (file)
@@ -4,3 +4,26 @@ endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 PERF_HAVE_JITDUMP := 1
+
+###
+# Syscall table generation
+#
+
+out    := $(OUTPUT)arch/x86/include/generated/asm
+header := $(out)/syscalls_64.c
+sys    := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sys)/syscall_64.tbl $(systbl)
+       @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
+        || echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true
+       $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
+
+clean::
+       $(call QUIET_CLEAN, x86) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
new file mode 100644 (file)
index 0000000..cac6d17
--- /dev/null
@@ -0,0 +1,376 @@
+#
+# 64-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The abi is "common", "64" or "x32" for this file.
+#
+0      common  read                    sys_read
+1      common  write                   sys_write
+2      common  open                    sys_open
+3      common  close                   sys_close
+4      common  stat                    sys_newstat
+5      common  fstat                   sys_newfstat
+6      common  lstat                   sys_newlstat
+7      common  poll                    sys_poll
+8      common  lseek                   sys_lseek
+9      common  mmap                    sys_mmap
+10     common  mprotect                sys_mprotect
+11     common  munmap                  sys_munmap
+12     common  brk                     sys_brk
+13     64      rt_sigaction            sys_rt_sigaction
+14     common  rt_sigprocmask          sys_rt_sigprocmask
+15     64      rt_sigreturn            sys_rt_sigreturn/ptregs
+16     64      ioctl                   sys_ioctl
+17     common  pread64                 sys_pread64
+18     common  pwrite64                sys_pwrite64
+19     64      readv                   sys_readv
+20     64      writev                  sys_writev
+21     common  access                  sys_access
+22     common  pipe                    sys_pipe
+23     common  select                  sys_select
+24     common  sched_yield             sys_sched_yield
+25     common  mremap                  sys_mremap
+26     common  msync                   sys_msync
+27     common  mincore                 sys_mincore
+28     common  madvise                 sys_madvise
+29     common  shmget                  sys_shmget
+30     common  shmat                   sys_shmat
+31     common  shmctl                  sys_shmctl
+32     common  dup                     sys_dup
+33     common  dup2                    sys_dup2
+34     common  pause                   sys_pause
+35     common  nanosleep               sys_nanosleep
+36     common  getitimer               sys_getitimer
+37     common  alarm                   sys_alarm
+38     common  setitimer               sys_setitimer
+39     common  getpid                  sys_getpid
+40     common  sendfile                sys_sendfile64
+41     common  socket                  sys_socket
+42     common  connect                 sys_connect
+43     common  accept                  sys_accept
+44     common  sendto                  sys_sendto
+45     64      recvfrom                sys_recvfrom
+46     64      sendmsg                 sys_sendmsg
+47     64      recvmsg                 sys_recvmsg
+48     common  shutdown                sys_shutdown
+49     common  bind                    sys_bind
+50     common  listen                  sys_listen
+51     common  getsockname             sys_getsockname
+52     common  getpeername             sys_getpeername
+53     common  socketpair              sys_socketpair
+54     64      setsockopt              sys_setsockopt
+55     64      getsockopt              sys_getsockopt
+56     common  clone                   sys_clone/ptregs
+57     common  fork                    sys_fork/ptregs
+58     common  vfork                   sys_vfork/ptregs
+59     64      execve                  sys_execve/ptregs
+60     common  exit                    sys_exit
+61     common  wait4                   sys_wait4
+62     common  kill                    sys_kill
+63     common  uname                   sys_newuname
+64     common  semget                  sys_semget
+65     common  semop                   sys_semop
+66     common  semctl                  sys_semctl
+67     common  shmdt                   sys_shmdt
+68     common  msgget                  sys_msgget
+69     common  msgsnd                  sys_msgsnd
+70     common  msgrcv                  sys_msgrcv
+71     common  msgctl                  sys_msgctl
+72     common  fcntl                   sys_fcntl
+73     common  flock                   sys_flock
+74     common  fsync                   sys_fsync
+75     common  fdatasync               sys_fdatasync
+76     common  truncate                sys_truncate
+77     common  ftruncate               sys_ftruncate
+78     common  getdents                sys_getdents
+79     common  getcwd                  sys_getcwd
+80     common  chdir                   sys_chdir
+81     common  fchdir                  sys_fchdir
+82     common  rename                  sys_rename
+83     common  mkdir                   sys_mkdir
+84     common  rmdir                   sys_rmdir
+85     common  creat                   sys_creat
+86     common  link                    sys_link
+87     common  unlink                  sys_unlink
+88     common  symlink                 sys_symlink
+89     common  readlink                sys_readlink
+90     common  chmod                   sys_chmod
+91     common  fchmod                  sys_fchmod
+92     common  chown                   sys_chown
+93     common  fchown                  sys_fchown
+94     common  lchown                  sys_lchown
+95     common  umask                   sys_umask
+96     common  gettimeofday            sys_gettimeofday
+97     common  getrlimit               sys_getrlimit
+98     common  getrusage               sys_getrusage
+99     common  sysinfo                 sys_sysinfo
+100    common  times                   sys_times
+101    64      ptrace                  sys_ptrace
+102    common  getuid                  sys_getuid
+103    common  syslog                  sys_syslog
+104    common  getgid                  sys_getgid
+105    common  setuid                  sys_setuid
+106    common  setgid                  sys_setgid
+107    common  geteuid                 sys_geteuid
+108    common  getegid                 sys_getegid
+109    common  setpgid                 sys_setpgid
+110    common  getppid                 sys_getppid
+111    common  getpgrp                 sys_getpgrp
+112    common  setsid                  sys_setsid
+113    common  setreuid                sys_setreuid
+114    common  setregid                sys_setregid
+115    common  getgroups               sys_getgroups
+116    common  setgroups               sys_setgroups
+117    common  setresuid               sys_setresuid
+118    common  getresuid               sys_getresuid
+119    common  setresgid               sys_setresgid
+120    common  getresgid               sys_getresgid
+121    common  getpgid                 sys_getpgid
+122    common  setfsuid                sys_setfsuid
+123    common  setfsgid                sys_setfsgid
+124    common  getsid                  sys_getsid
+125    common  capget                  sys_capget
+126    common  capset                  sys_capset
+127    64      rt_sigpending           sys_rt_sigpending
+128    64      rt_sigtimedwait         sys_rt_sigtimedwait
+129    64      rt_sigqueueinfo         sys_rt_sigqueueinfo
+130    common  rt_sigsuspend           sys_rt_sigsuspend
+131    64      sigaltstack             sys_sigaltstack
+132    common  utime                   sys_utime
+133    common  mknod                   sys_mknod
+134    64      uselib
+135    common  personality             sys_personality
+136    common  ustat                   sys_ustat
+137    common  statfs                  sys_statfs
+138    common  fstatfs                 sys_fstatfs
+139    common  sysfs                   sys_sysfs
+140    common  getpriority             sys_getpriority
+141    common  setpriority             sys_setpriority
+142    common  sched_setparam          sys_sched_setparam
+143    common  sched_getparam          sys_sched_getparam
+144    common  sched_setscheduler      sys_sched_setscheduler
+145    common  sched_getscheduler      sys_sched_getscheduler
+146    common  sched_get_priority_max  sys_sched_get_priority_max
+147    common  sched_get_priority_min  sys_sched_get_priority_min
+148    common  sched_rr_get_interval   sys_sched_rr_get_interval
+149    common  mlock                   sys_mlock
+150    common  munlock                 sys_munlock
+151    common  mlockall                sys_mlockall
+152    common  munlockall              sys_munlockall
+153    common  vhangup                 sys_vhangup
+154    common  modify_ldt              sys_modify_ldt
+155    common  pivot_root              sys_pivot_root
+156    64      _sysctl                 sys_sysctl
+157    common  prctl                   sys_prctl
+158    common  arch_prctl              sys_arch_prctl
+159    common  adjtimex                sys_adjtimex
+160    common  setrlimit               sys_setrlimit
+161    common  chroot                  sys_chroot
+162    common  sync                    sys_sync
+163    common  acct                    sys_acct
+164    common  settimeofday            sys_settimeofday
+165    common  mount                   sys_mount
+166    common  umount2                 sys_umount
+167    common  swapon                  sys_swapon
+168    common  swapoff                 sys_swapoff
+169    common  reboot                  sys_reboot
+170    common  sethostname             sys_sethostname
+171    common  setdomainname           sys_setdomainname
+172    common  iopl                    sys_iopl/ptregs
+173    common  ioperm                  sys_ioperm
+174    64      create_module
+175    common  init_module             sys_init_module
+176    common  delete_module           sys_delete_module
+177    64      get_kernel_syms
+178    64      query_module
+179    common  quotactl                sys_quotactl
+180    64      nfsservctl
+181    common  getpmsg
+182    common  putpmsg
+183    common  afs_syscall
+184    common  tuxcall
+185    common  security
+186    common  gettid                  sys_gettid
+187    common  readahead               sys_readahead
+188    common  setxattr                sys_setxattr
+189    common  lsetxattr               sys_lsetxattr
+190    common  fsetxattr               sys_fsetxattr
+191    common  getxattr                sys_getxattr
+192    common  lgetxattr               sys_lgetxattr
+193    common  fgetxattr               sys_fgetxattr
+194    common  listxattr               sys_listxattr
+195    common  llistxattr              sys_llistxattr
+196    common  flistxattr              sys_flistxattr
+197    common  removexattr             sys_removexattr
+198    common  lremovexattr            sys_lremovexattr
+199    common  fremovexattr            sys_fremovexattr
+200    common  tkill                   sys_tkill
+201    common  time                    sys_time
+202    common  futex                   sys_futex
+203    common  sched_setaffinity       sys_sched_setaffinity
+204    common  sched_getaffinity       sys_sched_getaffinity
+205    64      set_thread_area
+206    64      io_setup                sys_io_setup
+207    common  io_destroy              sys_io_destroy
+208    common  io_getevents            sys_io_getevents
+209    64      io_submit               sys_io_submit
+210    common  io_cancel               sys_io_cancel
+211    64      get_thread_area
+212    common  lookup_dcookie          sys_lookup_dcookie
+213    common  epoll_create            sys_epoll_create
+214    64      epoll_ctl_old
+215    64      epoll_wait_old
+216    common  remap_file_pages        sys_remap_file_pages
+217    common  getdents64              sys_getdents64
+218    common  set_tid_address         sys_set_tid_address
+219    common  restart_syscall         sys_restart_syscall
+220    common  semtimedop              sys_semtimedop
+221    common  fadvise64               sys_fadvise64
+222    64      timer_create            sys_timer_create
+223    common  timer_settime           sys_timer_settime
+224    common  timer_gettime           sys_timer_gettime
+225    common  timer_getoverrun        sys_timer_getoverrun
+226    common  timer_delete            sys_timer_delete
+227    common  clock_settime           sys_clock_settime
+228    common  clock_gettime           sys_clock_gettime
+229    common  clock_getres            sys_clock_getres
+230    common  clock_nanosleep         sys_clock_nanosleep
+231    common  exit_group              sys_exit_group
+232    common  epoll_wait              sys_epoll_wait
+233    common  epoll_ctl               sys_epoll_ctl
+234    common  tgkill                  sys_tgkill
+235    common  utimes                  sys_utimes
+236    64      vserver
+237    common  mbind                   sys_mbind
+238    common  set_mempolicy           sys_set_mempolicy
+239    common  get_mempolicy           sys_get_mempolicy
+240    common  mq_open                 sys_mq_open
+241    common  mq_unlink               sys_mq_unlink
+242    common  mq_timedsend            sys_mq_timedsend
+243    common  mq_timedreceive         sys_mq_timedreceive
+244    64      mq_notify               sys_mq_notify
+245    common  mq_getsetattr           sys_mq_getsetattr
+246    64      kexec_load              sys_kexec_load
+247    64      waitid                  sys_waitid
+248    common  add_key                 sys_add_key
+249    common  request_key             sys_request_key
+250    common  keyctl                  sys_keyctl
+251    common  ioprio_set              sys_ioprio_set
+252    common  ioprio_get              sys_ioprio_get
+253    common  inotify_init            sys_inotify_init
+254    common  inotify_add_watch       sys_inotify_add_watch
+255    common  inotify_rm_watch        sys_inotify_rm_watch
+256    common  migrate_pages           sys_migrate_pages
+257    common  openat                  sys_openat
+258    common  mkdirat                 sys_mkdirat
+259    common  mknodat                 sys_mknodat
+260    common  fchownat                sys_fchownat
+261    common  futimesat               sys_futimesat
+262    common  newfstatat              sys_newfstatat
+263    common  unlinkat                sys_unlinkat
+264    common  renameat                sys_renameat
+265    common  linkat                  sys_linkat
+266    common  symlinkat               sys_symlinkat
+267    common  readlinkat              sys_readlinkat
+268    common  fchmodat                sys_fchmodat
+269    common  faccessat               sys_faccessat
+270    common  pselect6                sys_pselect6
+271    common  ppoll                   sys_ppoll
+272    common  unshare                 sys_unshare
+273    64      set_robust_list         sys_set_robust_list
+274    64      get_robust_list         sys_get_robust_list
+275    common  splice                  sys_splice
+276    common  tee                     sys_tee
+277    common  sync_file_range         sys_sync_file_range
+278    64      vmsplice                sys_vmsplice
+279    64      move_pages              sys_move_pages
+280    common  utimensat               sys_utimensat
+281    common  epoll_pwait             sys_epoll_pwait
+282    common  signalfd                sys_signalfd
+283    common  timerfd_create          sys_timerfd_create
+284    common  eventfd                 sys_eventfd
+285    common  fallocate               sys_fallocate
+286    common  timerfd_settime         sys_timerfd_settime
+287    common  timerfd_gettime         sys_timerfd_gettime
+288    common  accept4                 sys_accept4
+289    common  signalfd4               sys_signalfd4
+290    common  eventfd2                sys_eventfd2
+291    common  epoll_create1           sys_epoll_create1
+292    common  dup3                    sys_dup3
+293    common  pipe2                   sys_pipe2
+294    common  inotify_init1           sys_inotify_init1
+295    64      preadv                  sys_preadv
+296    64      pwritev                 sys_pwritev
+297    64      rt_tgsigqueueinfo       sys_rt_tgsigqueueinfo
+298    common  perf_event_open         sys_perf_event_open
+299    64      recvmmsg                sys_recvmmsg
+300    common  fanotify_init           sys_fanotify_init
+301    common  fanotify_mark           sys_fanotify_mark
+302    common  prlimit64               sys_prlimit64
+303    common  name_to_handle_at       sys_name_to_handle_at
+304    common  open_by_handle_at       sys_open_by_handle_at
+305    common  clock_adjtime           sys_clock_adjtime
+306    common  syncfs                  sys_syncfs
+307    64      sendmmsg                sys_sendmmsg
+308    common  setns                   sys_setns
+309    common  getcpu                  sys_getcpu
+310    64      process_vm_readv        sys_process_vm_readv
+311    64      process_vm_writev       sys_process_vm_writev
+312    common  kcmp                    sys_kcmp
+313    common  finit_module            sys_finit_module
+314    common  sched_setattr           sys_sched_setattr
+315    common  sched_getattr           sys_sched_getattr
+316    common  renameat2               sys_renameat2
+317    common  seccomp                 sys_seccomp
+318    common  getrandom               sys_getrandom
+319    common  memfd_create            sys_memfd_create
+320    common  kexec_file_load         sys_kexec_file_load
+321    common  bpf                     sys_bpf
+322    64      execveat                sys_execveat/ptregs
+323    common  userfaultfd             sys_userfaultfd
+324    common  membarrier              sys_membarrier
+325    common  mlock2                  sys_mlock2
+326    common  copy_file_range         sys_copy_file_range
+327    64      preadv2                 sys_preadv2
+328    64      pwritev2                sys_pwritev2
+
+#
+# x32-specific system call numbers start at 512 to avoid cache impact
+# for native 64-bit operation.
+#
+512    x32     rt_sigaction            compat_sys_rt_sigaction
+513    x32     rt_sigreturn            sys32_x32_rt_sigreturn
+514    x32     ioctl                   compat_sys_ioctl
+515    x32     readv                   compat_sys_readv
+516    x32     writev                  compat_sys_writev
+517    x32     recvfrom                compat_sys_recvfrom
+518    x32     sendmsg                 compat_sys_sendmsg
+519    x32     recvmsg                 compat_sys_recvmsg
+520    x32     execve                  compat_sys_execve/ptregs
+521    x32     ptrace                  compat_sys_ptrace
+522    x32     rt_sigpending           compat_sys_rt_sigpending
+523    x32     rt_sigtimedwait         compat_sys_rt_sigtimedwait
+524    x32     rt_sigqueueinfo         compat_sys_rt_sigqueueinfo
+525    x32     sigaltstack             compat_sys_sigaltstack
+526    x32     timer_create            compat_sys_timer_create
+527    x32     mq_notify               compat_sys_mq_notify
+528    x32     kexec_load              compat_sys_kexec_load
+529    x32     waitid                  compat_sys_waitid
+530    x32     set_robust_list         compat_sys_set_robust_list
+531    x32     get_robust_list         compat_sys_get_robust_list
+532    x32     vmsplice                compat_sys_vmsplice
+533    x32     move_pages              compat_sys_move_pages
+534    x32     preadv                  compat_sys_preadv64
+535    x32     pwritev                 compat_sys_pwritev64
+536    x32     rt_tgsigqueueinfo       compat_sys_rt_tgsigqueueinfo
+537    x32     recvmmsg                compat_sys_recvmmsg
+538    x32     sendmmsg                compat_sys_sendmmsg
+539    x32     process_vm_readv        compat_sys_process_vm_readv
+540    x32     process_vm_writev       compat_sys_process_vm_writev
+541    x32     setsockopt              compat_sys_setsockopt
+542    x32     getsockopt              compat_sys_getsockopt
+543    x32     io_setup                compat_sys_io_setup
+544    x32     io_submit               compat_sys_io_submit
+545    x32     execveat                compat_sys_execveat/ptregs
diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
new file mode 100755 (executable)
index 0000000..49a18b9
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+in="$1"
+arch="$2"
+
+syscall_macro() {
+    nr="$1"
+    name="$2"
+
+    echo "     [$nr] = \"$name\","
+}
+
+emit() {
+    nr="$1"
+    entry="$2"
+
+    syscall_macro "$nr" "$entry"
+}
+
+echo "static const char *syscalltbl_${arch}[] = {"
+
+sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
+grep '^[0-9]' "$in" | sort -n > $sorted_table
+
+max_nr=0
+while read nr abi name entry compat; do
+    if [ $nr -ge 512 ] ; then # discard compat sycalls
+        break
+    fi
+
+    emit "$nr" "$name"
+    max_nr=$nr
+done < $sorted_table
+
+rm -f $sorted_table
+
+echo "};"
+
+echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
index 9d29ee283ac5334bfd6a529c7a9d226f0db782cd..d4aa567a29c4685ece1fb142577322ba9e57c890 100644 (file)
@@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused)
 
        CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-       perf_evlist__config(evlist, &opts);
+       perf_evlist__config(evlist, &opts, NULL);
 
        evsel = perf_evlist__first(evlist);
 
index 9223c164e545d869267b9b7a17d409b774dd7904..1f86ee8fb831c99e8d22ead64ca33ec11a816985 100644 (file)
@@ -63,6 +63,8 @@ struct pt_regs_offset {
 # define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
 #endif
 
+/* TODO: switching by dwarf address size */
+#ifndef __x86_64__
 static const struct pt_regs_offset x86_32_regoffset_table[] = {
        REG_OFFSET_NAME_32("%ax",       eax),
        REG_OFFSET_NAME_32("%cx",       ecx),
@@ -75,6 +77,8 @@ static const struct pt_regs_offset x86_32_regoffset_table[] = {
        REG_OFFSET_END,
 };
 
+#define regoffset_table x86_32_regoffset_table
+#else
 static const struct pt_regs_offset x86_64_regoffset_table[] = {
        REG_OFFSET_NAME_64("%ax",       rax),
        REG_OFFSET_NAME_64("%dx",       rdx),
@@ -95,11 +99,7 @@ static const struct pt_regs_offset x86_64_regoffset_table[] = {
        REG_OFFSET_END,
 };
 
-/* TODO: switching by dwarf address size */
-#ifdef __x86_64__
 #define regoffset_table x86_64_regoffset_table
-#else
-#define regoffset_table x86_32_regoffset_table
 #endif
 
 /* Minus 1 for the ending REG_OFFSET_END */
index d66f9ad4df2ea5da6eca22cae351a842d1143508..7dc30637cf66f4957dd6608a9b3a145a0e81a487 100644 (file)
@@ -438,6 +438,11 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
        if (!intel_bts_pmu)
                return NULL;
 
+       if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+               *err = -errno;
+               return NULL;
+       }
+
        btsr = zalloc(sizeof(struct intel_bts_recording));
        if (!btsr) {
                *err = -ENOMEM;
index a3395179c9eebd5fcd39aa4796a0dc3d495f4b5d..a07b9605e93b3f1c7c746f5296229c20f8f3767a 100644 (file)
@@ -1027,6 +1027,11 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
        if (!intel_pt_pmu)
                return NULL;
 
+       if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+               *err = -errno;
+               return NULL;
+       }
+
        ptr = zalloc(sizeof(struct intel_pt_recording));
        if (!ptr) {
                *err = -ENOMEM;
index fd2868490d00ea895b380e5cd408482c2f990a35..357f1b13b5ae3e585aa609e303e08bd6f344d546 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/types.h>
 #include "../../util/debug.h"
 #include "../../util/tsc.h"
-#include "tsc.h"
 
 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
                             struct perf_tsc_conversion *tc)
@@ -46,3 +45,34 @@ u64 rdtsc(void)
 
        return low | ((u64)high) << 32;
 }
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+                               struct perf_tool *tool,
+                               perf_event__handler_t process,
+                               struct machine *machine)
+{
+       union perf_event event = {
+               .time_conv = {
+                       .header = {
+                               .type = PERF_RECORD_TIME_CONV,
+                               .size = sizeof(struct time_conv_event),
+                       },
+               },
+       };
+       struct perf_tsc_conversion tc;
+       int err;
+
+       err = perf_read_tsc_conversion(pc, &tc);
+       if (err == -EOPNOTSUPP)
+               return 0;
+       if (err)
+               return err;
+
+       pr_debug2("Synthesizing TSC conversion information\n");
+
+       event.time_conv.time_mult  = tc.time_mult;
+       event.time_conv.time_shift = tc.time_shift;
+       event.time_conv.time_zero  = tc.time_zero;
+
+       return process(tool, &event, NULL, machine);
+}
diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
deleted file mode 100644 (file)
index 2edc4d3..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
-#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
-
-#include <linux/types.h>
-
-struct perf_tsc_conversion {
-       u16 time_shift;
-       u32 time_mult;
-       u64 time_zero;
-};
-
-struct perf_event_mmap_page;
-
-int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
-                            struct perf_tsc_conversion *tc);
-
-#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
index 6a18ce21f8659baba96dd5c36f3460919906caba..6952db65508abced08317b18e8262cc72881b32b 100644 (file)
@@ -83,7 +83,7 @@ static void *workerfn(void *arg)
        do {
                int ret;
        again:
-               ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
+               ret = futex_lock_pi(w->futex, NULL, futex_flag);
 
                if (ret) { /* handle lock acquisition */
                        if (!silent)
index d44de9f44281b11cbd7fa9740424b8b33acb4e93..b2e06d1190d0766694c97f7f0b808717442af604 100644 (file)
@@ -57,13 +57,11 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
 
 /**
  * futex_lock_pi() - block on uaddr as a PI mutex
- * @detect:    whether (1) or not (0) to perform deadlock detection
  */
 static inline int
-futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
-             int opflags)
+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
 {
-       return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+       return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
 }
 
 /**
index a91aa85d80ffc250241d84178da82df658be8f36..2b54d0f2672a39eaee68c5b5a48de2c01ba2956f 100644 (file)
@@ -6,6 +6,7 @@
  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  */
 
+#include "debug.h"
 #include "../perf.h"
 #include "../util/util.h"
 #include <subcmd/parse-options.h>
@@ -63,14 +64,16 @@ static struct perf_event_attr cycle_attr = {
        .config         = PERF_COUNT_HW_CPU_CYCLES
 };
 
-static void init_cycles(void)
+static int init_cycles(void)
 {
        cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
 
-       if (cycles_fd < 0 && errno == ENOSYS)
-               die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
-       else
-               BUG_ON(cycles_fd < 0);
+       if (cycles_fd < 0 && errno == ENOSYS) {
+               pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+               return -1;
+       }
+
+       return cycles_fd;
 }
 
 static u64 get_cycles(void)
@@ -155,8 +158,13 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
 
        argc = parse_options(argc, argv, options, info->usage, 0);
 
-       if (use_cycles)
-               init_cycles();
+       if (use_cycles) {
+               i = init_cycles();
+               if (i < 0) {
+                       fprintf(stderr, "Failed to open cycles counter\n");
+                       return i;
+               }
+       }
 
        size = (size_t)perf_atoll((char *)size_str);
        size_total = (double)size * nr_loops;
index c42448ed5dfe20a74af9c671669aa23a25cadb5b..fe1b77fa21f91c409666f2fd9ede2f2df82a1d76 100644 (file)
@@ -12,6 +12,7 @@
 #include <subcmd/parse-options.h>
 #include "util/util.h"
 #include "util/debug.h"
+#include "util/config.h"
 
 static bool use_system_config, use_user_config;
 
@@ -32,13 +33,28 @@ static struct option config_options[] = {
        OPT_END()
 };
 
-static int show_config(const char *key, const char *value,
-                      void *cb __maybe_unused)
+static int show_config(struct perf_config_set *set)
 {
-       if (value)
-               printf("%s=%s\n", key, value);
-       else
-               printf("%s\n", key);
+       struct perf_config_section *section;
+       struct perf_config_item *item;
+       struct list_head *sections;
+
+       if (set == NULL)
+               return -1;
+
+       sections = &set->sections;
+       if (list_empty(sections))
+               return -1;
+
+       list_for_each_entry(section, sections, node) {
+               list_for_each_entry(item, &section->items, node) {
+                       char *value = item->value;
+
+                       if (value)
+                               printf("%s.%s=%s\n", section->name,
+                                      item->name, value);
+               }
+       }
 
        return 0;
 }
@@ -46,6 +62,7 @@ static int show_config(const char *key, const char *value,
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
        int ret = 0;
+       struct perf_config_set *set;
        char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
        argc = parse_options(argc, argv, config_options, config_usage,
@@ -63,13 +80,19 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
        else if (use_user_config)
                config_exclusive_filename = user_config;
 
+       set = perf_config_set__new();
+       if (!set) {
+               ret = -1;
+               goto out_err;
+       }
+
        switch (actions) {
        case ACTION_LIST:
                if (argc) {
                        pr_err("Error: takes no arguments\n");
                        parse_options_usage(config_usage, config_options, "l", 1);
                } else {
-                       ret = perf_config(show_config, NULL);
+                       ret = show_config(set);
                        if (ret < 0) {
                                const char * config_filename = config_exclusive_filename;
                                if (!config_exclusive_filename)
@@ -83,5 +106,7 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
                usage_with_options(config_usage, config_options);
        }
 
+       perf_config_set__delete(set);
+out_err:
        return ret;
 }
index 8053a8ceefdad28d3008de359190f3f043929267..9ce354f469dce9e96d078ae5c352732cec59178c 100644 (file)
@@ -428,7 +428,7 @@ static void hists__baseline_only(struct hists *hists)
        struct rb_root *root;
        struct rb_node *next;
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root = &hists->entries_collapsed;
        else
                root = hists->entries_in;
@@ -450,7 +450,7 @@ static void hists__precompute(struct hists *hists)
        struct rb_root *root;
        struct rb_node *next;
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root = &hists->entries_collapsed;
        else
                root = hists->entries_in;
index d1a2d104f2bc19153159f83c3bf4941896c5ccad..e5afa8fe1bf1125ab577ea090befa42dbe64760c 100644 (file)
@@ -748,6 +748,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                        .auxtrace_info  = perf_event__repipe_op2_synth,
                        .auxtrace       = perf_event__repipe_auxtrace,
                        .auxtrace_error = perf_event__repipe_op2_synth,
+                       .time_conv      = perf_event__repipe_op2_synth,
                        .finished_round = perf_event__repipe_oe_synth,
                        .build_id       = perf_event__repipe_op2_synth,
                        .id_index       = perf_event__repipe_op2_synth,
index c9cb3be47cff4801d105d8c5091768a4646ccdc5..58adfee230de8c2c2d36b5692b1cc668c1af5f01 100644 (file)
@@ -375,7 +375,7 @@ static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
        }
 
        al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
-       sample__resolve_callchain(sample, NULL, evsel, &al, 16);
+       sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
 
        callchain_cursor_commit(&callchain_cursor);
        while (true) {
index bff666458b28e24dccac682d0f28b6708a1a7c83..6487c06d270853fdf3c0e3a101f821d5692871a6 100644 (file)
@@ -982,7 +982,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
        struct perf_evlist *evlist = kvm->evlist;
        char sbuf[STRERR_BUFSIZE];
 
-       perf_evlist__config(evlist, &kvm->opts);
+       perf_evlist__config(evlist, &kvm->opts, NULL);
 
        /*
         * Note: exclude_{guest,host} do not apply here.
index 85db3be4b3cb6365059adb3fea17dcf3e6700f77..1dc140c5481d61a1639de2fb5bcb2e1021b0dfc5 100644 (file)
@@ -62,19 +62,22 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        int rec_argc, i = 0, j;
        const char **rec_argv;
        int ret;
+       bool all_user = false, all_kernel = false;
        struct option options[] = {
        OPT_CALLBACK('e', "event", &mem, "event",
                     "event selector. use 'perf mem record -e list' to list available events",
                     parse_record_events),
        OPT_INCR('v', "verbose", &verbose,
                 "be more verbose (show counter open errors, etc)"),
+       OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
+       OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"),
        OPT_END()
        };
 
        argc = parse_options(argc, argv, options, record_mem_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
 
-       rec_argc = argc + 7; /* max number of arguments */
+       rec_argc = argc + 9; /* max number of arguments */
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
        if (!rec_argv)
                return -1;
@@ -103,6 +106,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
                rec_argv[i++] = perf_mem_events__name(j);
        };
 
+       if (all_user)
+               rec_argv[i++] = "--all-user";
+
+       if (all_kernel)
+               rec_argv[i++] = "--all-kernel";
+
        for (j = 0; j < argc; j++, i++)
                rec_argv[i] = argv[j];
 
index 515510ecc76a43391e2ac58f830557b51810b466..f3679c44d3f3d4b7c51bbb627375a315099b974d 100644 (file)
 #include "util/data.h"
 #include "util/perf_regs.h"
 #include "util/auxtrace.h"
+#include "util/tsc.h"
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
 #include "util/bpf-loader.h"
+#include "util/trigger.h"
 #include "asm/bug.h"
 
 #include <unistd.h>
@@ -55,6 +57,8 @@ struct record {
        bool                    no_buildid_cache;
        bool                    no_buildid_cache_set;
        bool                    buildid_all;
+       bool                    timestamp_filename;
+       bool                    switch_output;
        unsigned long long      samples;
 };
 
@@ -124,9 +128,10 @@ out:
 static volatile int done;
 static volatile int signr = -1;
 static volatile int child_finished;
-static volatile int auxtrace_snapshot_enabled;
-static volatile int auxtrace_snapshot_err;
+
 static volatile int auxtrace_record__snapshot_started;
+static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
+static DEFINE_TRIGGER(switch_output_trigger);
 
 static void sig_handler(int sig)
 {
@@ -244,11 +249,12 @@ static void record__read_auxtrace_snapshot(struct record *rec)
 {
        pr_debug("Recording AUX area tracing snapshot\n");
        if (record__auxtrace_read_snapshot_all(rec) < 0) {
-               auxtrace_snapshot_err = -1;
+               trigger_error(&auxtrace_snapshot_trigger);
        } else {
-               auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
-               if (!auxtrace_snapshot_err)
-                       auxtrace_snapshot_enabled = 1;
+               if (auxtrace_record__snapshot_finish(rec->itr))
+                       trigger_error(&auxtrace_snapshot_trigger);
+               else
+                       trigger_ready(&auxtrace_snapshot_trigger);
        }
 }
 
@@ -283,7 +289,7 @@ static int record__open(struct record *rec)
        struct record_opts *opts = &rec->opts;
        int rc = 0;
 
-       perf_evlist__config(evlist, opts);
+       perf_evlist__config(evlist, opts, &callchain_param);
 
        evlist__for_each(evlist, pos) {
 try_again:
@@ -494,6 +500,73 @@ record__finish_output(struct record *rec)
        return;
 }
 
+static int record__synthesize_workload(struct record *rec)
+{
+       struct {
+               struct thread_map map;
+               struct thread_map_data map_data;
+       } thread_map;
+
+       thread_map.map.nr = 1;
+       thread_map.map.map[0].pid = rec->evlist->workload.pid;
+       thread_map.map.map[0].comm = NULL;
+       return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
+                                                process_synthesized_event,
+                                                &rec->session->machines.host,
+                                                rec->opts.sample_address,
+                                                rec->opts.proc_map_timeout);
+}
+
+static int record__synthesize(struct record *rec);
+
+static int
+record__switch_output(struct record *rec, bool at_exit)
+{
+       struct perf_data_file *file = &rec->file;
+       int fd, err;
+
+       /* Same Size:      "2015122520103046"*/
+       char timestamp[] = "InvalidTimestamp";
+
+       rec->samples = 0;
+       record__finish_output(rec);
+       err = fetch_current_timestamp(timestamp, sizeof(timestamp));
+       if (err) {
+               pr_err("Failed to get current timestamp\n");
+               return -EINVAL;
+       }
+
+       fd = perf_data_file__switch(file, timestamp,
+                                   rec->session->header.data_offset,
+                                   at_exit);
+       if (fd >= 0 && !at_exit) {
+               rec->bytes_written = 0;
+               rec->session->header.data_size = 0;
+       }
+
+       if (!quiet)
+               fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
+                       file->path, timestamp);
+
+       /* Output tracking events */
+       if (!at_exit) {
+               record__synthesize(rec);
+
+               /*
+                * In 'perf record --switch-output' without -a,
+                * record__synthesize() in record__switch_output() won't
+                * generate tracking events because there's no thread_map
+                * in evlist. Which causes newly created perf.data doesn't
+                * contain map and comm information.
+                * Create a fake thread_map and directly call
+                * perf_event__synthesize_thread_map() for those events.
+                */
+               if (target__none(&rec->opts.target))
+                       record__synthesize_workload(rec);
+       }
+       return fd;
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -512,6 +585,15 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 
 static void snapshot_sig_handler(int sig);
 
+int __weak
+perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
+                           struct perf_tool *tool __maybe_unused,
+                           perf_event__handler_t process __maybe_unused,
+                           struct machine *machine __maybe_unused)
+{
+       return 0;
+}
+
 static int record__synthesize(struct record *rec)
 {
        struct perf_session *session = rec->session;
@@ -549,6 +631,11 @@ static int record__synthesize(struct record *rec)
                }
        }
 
+       err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
+                                         process_synthesized_event, machine);
+       if (err)
+               goto out;
+
        if (rec->opts.full_auxtrace) {
                err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
                                        session, process_synthesized_event);
@@ -600,10 +687,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        signal(SIGCHLD, sig_handler);
        signal(SIGINT, sig_handler);
        signal(SIGTERM, sig_handler);
-       if (rec->opts.auxtrace_snapshot_mode)
+
+       if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
                signal(SIGUSR2, snapshot_sig_handler);
-       else
+               if (rec->opts.auxtrace_snapshot_mode)
+                       trigger_on(&auxtrace_snapshot_trigger);
+               if (rec->switch_output)
+                       trigger_on(&switch_output_trigger);
+       } else {
                signal(SIGUSR2, SIG_IGN);
+       }
 
        session = perf_session__new(file, false, tool);
        if (session == NULL) {
@@ -729,27 +822,45 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                perf_evlist__enable(rec->evlist);
        }
 
-       auxtrace_snapshot_enabled = 1;
+       trigger_ready(&auxtrace_snapshot_trigger);
+       trigger_ready(&switch_output_trigger);
        for (;;) {
                unsigned long long hits = rec->samples;
 
                if (record__mmap_read_all(rec) < 0) {
-                       auxtrace_snapshot_enabled = 0;
+                       trigger_error(&auxtrace_snapshot_trigger);
+                       trigger_error(&switch_output_trigger);
                        err = -1;
                        goto out_child;
                }
 
                if (auxtrace_record__snapshot_started) {
                        auxtrace_record__snapshot_started = 0;
-                       if (!auxtrace_snapshot_err)
+                       if (!trigger_is_error(&auxtrace_snapshot_trigger))
                                record__read_auxtrace_snapshot(rec);
-                       if (auxtrace_snapshot_err) {
+                       if (trigger_is_error(&auxtrace_snapshot_trigger)) {
                                pr_err("AUX area tracing snapshot failed\n");
                                err = -1;
                                goto out_child;
                        }
                }
 
+               if (trigger_is_hit(&switch_output_trigger)) {
+                       trigger_ready(&switch_output_trigger);
+
+                       if (!quiet)
+                               fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
+                                       waking);
+                       waking = 0;
+                       fd = record__switch_output(rec, false);
+                       if (fd < 0) {
+                               pr_err("Failed to switch to new file\n");
+                               trigger_error(&switch_output_trigger);
+                               err = fd;
+                               goto out_child;
+                       }
+               }
+
                if (hits == rec->samples) {
                        if (done || draining)
                                break;
@@ -772,12 +883,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                 * disable events in this case.
                 */
                if (done && !disabled && !target__none(&opts->target)) {
-                       auxtrace_snapshot_enabled = 0;
+                       trigger_off(&auxtrace_snapshot_trigger);
                        perf_evlist__disable(rec->evlist);
                        disabled = true;
                }
        }
-       auxtrace_snapshot_enabled = 0;
+       trigger_off(&auxtrace_snapshot_trigger);
+       trigger_off(&switch_output_trigger);
 
        if (forks && workload_exec_errno) {
                char msg[STRERR_BUFSIZE];
@@ -811,11 +923,22 @@ out_child:
        /* this will be recalculated during process_buildids() */
        rec->samples = 0;
 
-       if (!err)
-               record__finish_output(rec);
+       if (!err) {
+               if (!rec->timestamp_filename) {
+                       record__finish_output(rec);
+               } else {
+                       fd = record__switch_output(rec, true);
+                       if (fd < 0) {
+                               status = fd;
+                               goto out_delete_session;
+                       }
+               }
+       }
 
        if (!err && !quiet) {
                char samples[128];
+               const char *postfix = rec->timestamp_filename ?
+                                       ".<timestamp>" : "";
 
                if (rec->samples && !rec->opts.full_auxtrace)
                        scnprintf(samples, sizeof(samples),
@@ -823,9 +946,9 @@ out_child:
                else
                        samples[0] = '\0';
 
-               fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
+               fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
                        perf_data_file__size(file) / 1024.0 / 1024.0,
-                       file->path, samples);
+                       file->path, postfix, samples);
        }
 
 out_delete_session:
@@ -833,58 +956,61 @@ out_delete_session:
        return status;
 }
 
-static void callchain_debug(void)
+static void callchain_debug(struct callchain_param *callchain)
 {
        static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
 
-       pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
+       pr_debug("callchain: type %s\n", str[callchain->record_mode]);
 
-       if (callchain_param.record_mode == CALLCHAIN_DWARF)
+       if (callchain->record_mode == CALLCHAIN_DWARF)
                pr_debug("callchain: stack dump size %d\n",
-                        callchain_param.dump_size);
+                        callchain->dump_size);
 }
 
-int record_parse_callchain_opt(const struct option *opt,
-                              const char *arg,
-                              int unset)
+int record_opts__parse_callchain(struct record_opts *record,
+                                struct callchain_param *callchain,
+                                const char *arg, bool unset)
 {
        int ret;
-       struct record_opts *record = (struct record_opts *)opt->value;
-
-       record->callgraph_set = true;
-       callchain_param.enabled = !unset;
+       callchain->enabled = !unset;
 
        /* --no-call-graph */
        if (unset) {
-               callchain_param.record_mode = CALLCHAIN_NONE;
+               callchain->record_mode = CALLCHAIN_NONE;
                pr_debug("callchain: disabled\n");
                return 0;
        }
 
-       ret = parse_callchain_record_opt(arg, &callchain_param);
+       ret = parse_callchain_record_opt(arg, callchain);
        if (!ret) {
                /* Enable data address sampling for DWARF unwind. */
-               if (callchain_param.record_mode == CALLCHAIN_DWARF)
+               if (callchain->record_mode == CALLCHAIN_DWARF)
                        record->sample_address = true;
-               callchain_debug();
+               callchain_debug(callchain);
        }
 
        return ret;
 }
 
+int record_parse_callchain_opt(const struct option *opt,
+                              const char *arg,
+                              int unset)
+{
+       return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
+}
+
 int record_callchain_opt(const struct option *opt,
                         const char *arg __maybe_unused,
                         int unset __maybe_unused)
 {
-       struct record_opts *record = (struct record_opts *)opt->value;
+       struct callchain_param *callchain = opt->value;
 
-       record->callgraph_set = true;
-       callchain_param.enabled = true;
+       callchain->enabled = true;
 
-       if (callchain_param.record_mode == CALLCHAIN_NONE)
-               callchain_param.record_mode = CALLCHAIN_FP;
+       if (callchain->record_mode == CALLCHAIN_NONE)
+               callchain->record_mode = CALLCHAIN_FP;
 
-       callchain_debug();
+       callchain_debug(callchain);
        return 0;
 }
 
@@ -1122,7 +1248,7 @@ struct option __record_options[] = {
                     record__parse_mmap_pages),
        OPT_BOOLEAN(0, "group", &record.opts.group,
                    "put the counters into a counter group"),
-       OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
+       OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
                           NULL, "enables call-graph recording" ,
                           &record_callchain_opt),
        OPT_CALLBACK(0, "call-graph", &record.opts,
@@ -1195,6 +1321,10 @@ struct option __record_options[] = {
                   "file", "vmlinux pathname"),
        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
                    "Record build-id of all DSOs regardless of hits"),
+       OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
+                   "append timestamp to output filename"),
+       OPT_BOOLEAN(0, "switch-output", &record.switch_output,
+                   "Switch output when receive SIGUSR2"),
        OPT_END()
 };
 
@@ -1250,6 +1380,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
                return -EINVAL;
        }
 
+       if (rec->switch_output)
+               rec->timestamp_filename = true;
+
        if (!rec->itr) {
                rec->itr = auxtrace_record__init(rec->evlist, &err);
                if (err)
@@ -1261,6 +1394,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
        if (err)
                return err;
 
+       err = bpf__setup_stdout(rec->evlist);
+       if (err) {
+               bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
+               pr_err("ERROR: Setup BPF stdout failed: %s\n",
+                        errbuf);
+               return err;
+       }
+
        err = -ENOMEM;
 
        symbol__init(NULL);
@@ -1275,8 +1416,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
 "even with a suitable vmlinux or kallsyms file.\n\n");
 
-       if (rec->no_buildid_cache || rec->no_buildid)
+       if (rec->no_buildid_cache || rec->no_buildid) {
                disable_buildid_cache();
+       } else if (rec->switch_output) {
+               /*
+                * In 'perf record --switch-output', disable buildid
+                * generation by default to reduce data file switching
+                * overhead. Still generate buildid if they are required
+                * explicitly using
+                *
+                *  perf record --signal-trigger --no-no-buildid \
+                *              --no-no-buildid-cache
+                *
+                * Following code equals to:
+                *
+                * if ((rec->no_buildid || !rec->no_buildid_set) &&
+                *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
+                *         disable_buildid_cache();
+                */
+               bool disable = true;
+
+               if (rec->no_buildid_set && !rec->no_buildid)
+                       disable = false;
+               if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
+                       disable = false;
+               if (disable) {
+                       rec->no_buildid = true;
+                       rec->no_buildid_cache = true;
+                       disable_buildid_cache();
+               }
+       }
 
        if (rec->evlist->nr_entries == 0 &&
            perf_evlist__add_default(rec->evlist) < 0) {
@@ -1335,9 +1504,13 @@ out_symbol_exit:
 
 static void snapshot_sig_handler(int sig __maybe_unused)
 {
-       if (!auxtrace_snapshot_enabled)
-               return;
-       auxtrace_snapshot_enabled = 0;
-       auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
-       auxtrace_record__snapshot_started = 1;
+       if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
+               trigger_hit(&auxtrace_snapshot_trigger);
+               auxtrace_record__snapshot_started = 1;
+               if (auxtrace_record__snapshot_start(record.itr))
+                       trigger_error(&auxtrace_snapshot_trigger);
+       }
+
+       if (trigger_is_ready(&switch_output_trigger))
+               trigger_hit(&switch_output_trigger);
 }
index 160ea23b45aaf5450f85b402ef38b27e06b63003..87d40e3c4078ee99740e4563ebff885792ec9aff 100644 (file)
@@ -47,7 +47,6 @@ struct report {
        struct perf_tool        tool;
        struct perf_session     *session;
        bool                    use_tui, use_gtk, use_stdio;
-       bool                    dont_use_callchains;
        bool                    show_full_info;
        bool                    show_threads;
        bool                    inverted_callchain;
@@ -235,7 +234,7 @@ static int report__setup_sample_type(struct report *rep)
                sample_type |= PERF_SAMPLE_BRANCH_STACK;
 
        if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
-               if (sort__has_parent) {
+               if (perf_hpp_list.parent) {
                        ui__error("Selected --sort parent, but no "
                                    "callchain data. Did you call "
                                    "'perf record' without -g?\n");
@@ -247,7 +246,7 @@ static int report__setup_sample_type(struct report *rep)
                                  "you call 'perf record' without -g?\n");
                        return -1;
                }
-       } else if (!rep->dont_use_callchains &&
+       } else if (!callchain_param.enabled &&
                   callchain_param.mode != CHAIN_NONE &&
                   !symbol_conf.use_callchain) {
                        symbol_conf.use_callchain = true;
@@ -599,13 +598,15 @@ static int __cmd_report(struct report *rep)
 static int
 report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
-       struct report *rep = (struct report *)opt->value;
+       struct callchain_param *callchain = opt->value;
 
+       callchain->enabled = !unset;
        /*
         * --no-call-graph
         */
        if (unset) {
-               rep->dont_use_callchains = true;
+               symbol_conf.use_callchain = false;
+               callchain->mode = CHAIN_NONE;
                return 0;
        }
 
@@ -690,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                        .ordered_events  = true,
                        .ordering_requires_timestamps = true,
                },
-               .max_stack               = PERF_MAX_STACK_DEPTH,
+               .max_stack               = sysctl_perf_event_max_stack,
                .pretty_printing_style   = "normal",
                .socket_filter           = -1,
        };
@@ -734,7 +735,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                   "regex filter to identify parent, see: '--sort parent'"),
        OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
                    "Only display entries with parent-match"),
-       OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
+       OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
                             "print_type,threshold[,print_limit],order,sort_key[,branch],value",
                             report_callchain_help, &report_parse_callchain_opt,
                             callchain_default_opt),
@@ -743,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_INTEGER(0, "max-stack", &report.max_stack,
                    "Set the maximum stack depth when parsing the callchain, "
                    "anything beyond the specified depth will be ignored. "
-                   "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                   "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
                    "alias for inverted call graph"),
        OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
@@ -935,7 +936,7 @@ repeat:
                        goto error;
                }
 
-               sort__need_collapse = true;
+               perf_hpp_list.need_collapse = true;
        }
 
        /* Force tty output for header output and per-thread stat. */
index 871b55ae22a4170d285c119cc5f5dddfc90a60e1..afa057666c2adf68ec1279dc29a027744ec38fff 100644 (file)
@@ -11,6 +11,8 @@
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/cloexec.h"
+#include "util/thread_map.h"
+#include "util/color.h"
 
 #include <subcmd/parse-options.h>
 #include "util/trace-event.h"
@@ -122,6 +124,21 @@ struct trace_sched_handler {
                                  struct machine *machine);
 };
 
+#define COLOR_PIDS PERF_COLOR_BLUE
+#define COLOR_CPUS PERF_COLOR_BG_RED
+
+struct perf_sched_map {
+       DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
+       int                     *comp_cpus;
+       bool                     comp;
+       struct thread_map       *color_pids;
+       const char              *color_pids_str;
+       struct cpu_map          *color_cpus;
+       const char              *color_cpus_str;
+       struct cpu_map          *cpus;
+       const char              *cpus_str;
+};
+
 struct perf_sched {
        struct perf_tool tool;
        const char       *sort_order;
@@ -173,6 +190,7 @@ struct perf_sched {
        struct list_head sort_list, cmp_pid;
        bool force;
        bool skip_merge;
+       struct perf_sched_map map;
 };
 
 static u64 get_nsecs(void)
@@ -1339,6 +1357,38 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
        return 0;
 }
 
+union map_priv {
+       void    *ptr;
+       bool     color;
+};
+
+static bool thread__has_color(struct thread *thread)
+{
+       union map_priv priv = {
+               .ptr = thread__priv(thread),
+       };
+
+       return priv.color;
+}
+
+static struct thread*
+map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
+{
+       struct thread *thread = machine__findnew_thread(machine, pid, tid);
+       union map_priv priv = {
+               .color = false,
+       };
+
+       if (!sched->map.color_pids || !thread || thread__priv(thread))
+               return thread;
+
+       if (thread_map__has(sched->map.color_pids, tid))
+               priv.color = true;
+
+       thread__set_priv(thread, priv.ptr);
+       return thread;
+}
+
 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
                            struct perf_sample *sample, struct machine *machine)
 {
@@ -1347,13 +1397,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
        int new_shortname;
        u64 timestamp0, timestamp = sample->time;
        s64 delta;
-       int cpu, this_cpu = sample->cpu;
+       int i, this_cpu = sample->cpu;
+       int cpus_nr;
+       bool new_cpu = false;
+       const char *color = PERF_COLOR_NORMAL;
 
        BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
 
        if (this_cpu > sched->max_cpu)
                sched->max_cpu = this_cpu;
 
+       if (sched->map.comp) {
+               cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
+               if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
+                       sched->map.comp_cpus[cpus_nr++] = this_cpu;
+                       new_cpu = true;
+               }
+       } else
+               cpus_nr = sched->max_cpu;
+
        timestamp0 = sched->cpu_last_switched[this_cpu];
        sched->cpu_last_switched[this_cpu] = timestamp;
        if (timestamp0)
@@ -1366,7 +1428,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
                return -1;
        }
 
-       sched_in = machine__findnew_thread(machine, -1, next_pid);
+       sched_in = map__findnew_thread(sched, machine, -1, next_pid);
        if (sched_in == NULL)
                return -1;
 
@@ -1400,26 +1462,52 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
                new_shortname = 1;
        }
 
-       for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
+       for (i = 0; i < cpus_nr; i++) {
+               int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
+               struct thread *curr_thread = sched->curr_thread[cpu];
+               const char *pid_color = color;
+               const char *cpu_color = color;
+
+               if (curr_thread && thread__has_color(curr_thread))
+                       pid_color = COLOR_PIDS;
+
+               if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
+                       continue;
+
+               if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
+                       cpu_color = COLOR_CPUS;
+
                if (cpu != this_cpu)
-                       printf(" ");
+                       color_fprintf(stdout, cpu_color, " ");
                else
-                       printf("*");
+                       color_fprintf(stdout, cpu_color, "*");
 
                if (sched->curr_thread[cpu])
-                       printf("%2s ", sched->curr_thread[cpu]->shortname);
+                       color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
                else
-                       printf("   ");
+                       color_fprintf(stdout, color, "   ");
        }
 
-       printf("  %12.6f secs ", (double)timestamp/1e9);
+       if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
+               goto out;
+
+       color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp/1e9);
        if (new_shortname) {
-               printf("%s => %s:%d\n",
+               const char *pid_color = color;
+
+               if (thread__has_color(sched_in))
+                       pid_color = COLOR_PIDS;
+
+               color_fprintf(stdout, pid_color, "%s => %s:%d",
                       sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
-       } else {
-               printf("\n");
        }
 
+       if (sched->map.comp && new_cpu)
+               color_fprintf(stdout, color, " (CPU %d)", this_cpu);
+
+out:
+       color_fprintf(stdout, color, "\n");
+
        thread__put(sched_in);
 
        return 0;
@@ -1675,9 +1763,75 @@ static int perf_sched__lat(struct perf_sched *sched)
        return 0;
 }
 
+static int setup_map_cpus(struct perf_sched *sched)
+{
+       struct cpu_map *map;
+
+       sched->max_cpu  = sysconf(_SC_NPROCESSORS_CONF);
+
+       if (sched->map.comp) {
+               sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
+               if (!sched->map.comp_cpus)
+                       return -1;
+       }
+
+       if (!sched->map.cpus_str)
+               return 0;
+
+       map = cpu_map__new(sched->map.cpus_str);
+       if (!map) {
+               pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
+               return -1;
+       }
+
+       sched->map.cpus = map;
+       return 0;
+}
+
+static int setup_color_pids(struct perf_sched *sched)
+{
+       struct thread_map *map;
+
+       if (!sched->map.color_pids_str)
+               return 0;
+
+       map = thread_map__new_by_tid_str(sched->map.color_pids_str);
+       if (!map) {
+               pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
+               return -1;
+       }
+
+       sched->map.color_pids = map;
+       return 0;
+}
+
+static int setup_color_cpus(struct perf_sched *sched)
+{
+       struct cpu_map *map;
+
+       if (!sched->map.color_cpus_str)
+               return 0;
+
+       map = cpu_map__new(sched->map.color_cpus_str);
+       if (!map) {
+               pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
+               return -1;
+       }
+
+       sched->map.color_cpus = map;
+       return 0;
+}
+
 static int perf_sched__map(struct perf_sched *sched)
 {
-       sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+       if (setup_map_cpus(sched))
+               return -1;
+
+       if (setup_color_pids(sched))
+               return -1;
+
+       if (setup_color_cpus(sched))
+               return -1;
 
        setup_pager();
        if (perf_sched__read_events(sched))
@@ -1831,6 +1985,17 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                    "dump raw trace in ASCII"),
        OPT_END()
        };
+       const struct option map_options[] = {
+       OPT_BOOLEAN(0, "compact", &sched.map.comp,
+                   "map output in compact mode"),
+       OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
+                  "highlight given pids in map"),
+       OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
+                    "highlight given CPUs in map"),
+       OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
+                    "display given CPUs in map"),
+       OPT_END()
+       };
        const char * const latency_usage[] = {
                "perf sched latency [<options>]",
                NULL
@@ -1839,6 +2004,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                "perf sched replay [<options>]",
                NULL
        };
+       const char * const map_usage[] = {
+               "perf sched map [<options>]",
+               NULL
+       };
        const char *const sched_subcommands[] = { "record", "latency", "map",
                                                  "replay", "script", NULL };
        const char *sched_usage[] = {
@@ -1887,6 +2056,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
                setup_sorting(&sched, latency_options, latency_usage);
                return perf_sched__lat(&sched);
        } else if (!strcmp(argv[0], "map")) {
+               if (argc) {
+                       argc = parse_options(argc, argv, map_options, map_usage, 0);
+                       if (argc)
+                               usage_with_options(map_usage, map_options);
+               }
                sched.tp_handler = &map_ops;
                setup_sorting(&sched, latency_options, latency_usage);
                return perf_sched__map(&sched);
index 3770c3dffe5e141e6c3af2736b9a3e0d41c81cc4..efca81679bb314624b88d024c52b63f2fc54729c 100644 (file)
@@ -22,6 +22,7 @@
 #include "util/thread_map.h"
 #include "util/stat.h"
 #include <linux/bitmap.h>
+#include <linux/stringify.h>
 #include "asm/bug.h"
 #include "util/mem-events.h"
 
@@ -317,19 +318,19 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
 
        output[type].print_ip_opts = 0;
        if (PRINT_FIELD(IP))
-               output[type].print_ip_opts |= PRINT_IP_OPT_IP;
+               output[type].print_ip_opts |= EVSEL__PRINT_IP;
 
        if (PRINT_FIELD(SYM))
-               output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
+               output[type].print_ip_opts |= EVSEL__PRINT_SYM;
 
        if (PRINT_FIELD(DSO))
-               output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
+               output[type].print_ip_opts |= EVSEL__PRINT_DSO;
 
        if (PRINT_FIELD(SYMOFFSET))
-               output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
+               output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
 
        if (PRINT_FIELD(SRCLINE))
-               output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE;
+               output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
 }
 
 /*
@@ -569,18 +570,23 @@ static void print_sample_bts(struct perf_sample *sample,
        /* print branch_from information */
        if (PRINT_FIELD(IP)) {
                unsigned int print_opts = output[attr->type].print_ip_opts;
+               struct callchain_cursor *cursor = NULL;
 
-               if (symbol_conf.use_callchain && sample->callchain) {
-                       printf("\n");
-               } else {
-                       printf(" ");
-                       if (print_opts & PRINT_IP_OPT_SRCLINE) {
+               if (symbol_conf.use_callchain && sample->callchain &&
+                   thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+                                             sample, NULL, NULL, scripting_max_stack) == 0)
+                       cursor = &callchain_cursor;
+
+               if (cursor == NULL) {
+                       putchar(' ');
+                       if (print_opts & EVSEL__PRINT_SRCLINE) {
                                print_srcline_last = true;
-                               print_opts &= ~PRINT_IP_OPT_SRCLINE;
+                               print_opts &= ~EVSEL__PRINT_SRCLINE;
                        }
-               }
-               perf_evsel__print_ip(evsel, sample, al, print_opts,
-                                    scripting_max_stack);
+               } else
+                       putchar('\n');
+
+               sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout);
        }
 
        /* print branch_to information */
@@ -783,14 +789,15 @@ static void process_event(struct perf_script *script,
                printf("%16" PRIu64, sample->weight);
 
        if (PRINT_FIELD(IP)) {
-               if (!symbol_conf.use_callchain)
-                       printf(" ");
-               else
-                       printf("\n");
+               struct callchain_cursor *cursor = NULL;
+
+               if (symbol_conf.use_callchain && sample->callchain &&
+                   thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+                                             sample, NULL, NULL, scripting_max_stack) == 0)
+                       cursor = &callchain_cursor;
 
-               perf_evsel__print_ip(evsel, sample, al,
-                                    output[attr->type].print_ip_opts,
-                                    scripting_max_stack);
+               putchar(cursor ? '\n' : ' ');
+               sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
        }
 
        if (PRINT_FIELD(IREGS))
@@ -1415,21 +1422,19 @@ static int is_directory(const char *base_path, const struct dirent *dent)
        return S_ISDIR(st.st_mode);
 }
 
-#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
-       while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) &&     \
-              lang_next)                                               \
-               if ((lang_dirent.d_type == DT_DIR ||                    \
-                    (lang_dirent.d_type == DT_UNKNOWN &&               \
-                     is_directory(scripts_path, &lang_dirent))) &&     \
-                   (strcmp(lang_dirent.d_name, ".")) &&                \
-                   (strcmp(lang_dirent.d_name, "..")))
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent)          \
+       while ((lang_dirent = readdir(scripts_dir)) != NULL)            \
+               if ((lang_dirent->d_type == DT_DIR ||                   \
+                    (lang_dirent->d_type == DT_UNKNOWN &&              \
+                     is_directory(scripts_path, lang_dirent))) &&      \
+                   (strcmp(lang_dirent->d_name, ".")) &&               \
+                   (strcmp(lang_dirent->d_name, "..")))
 
-#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
-       while (!readdir_r(lang_dir, &script_dirent, &script_next) &&    \
-              script_next)                                             \
-               if (script_dirent.d_type != DT_DIR &&                   \
-                   (script_dirent.d_type != DT_UNKNOWN ||              \
-                    !is_directory(lang_path, &script_dirent)))
+#define for_each_script(lang_path, lang_dir, script_dirent)            \
+       while ((script_dirent = readdir(lang_dir)) != NULL)             \
+               if (script_dirent->d_type != DT_DIR &&                  \
+                   (script_dirent->d_type != DT_UNKNOWN ||             \
+                    !is_directory(lang_path, script_dirent)))
 
 
 #define RECORD_SUFFIX                  "-record"
@@ -1575,7 +1580,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
                                  const char *s __maybe_unused,
                                  int unset __maybe_unused)
 {
-       struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+       struct dirent *script_dirent, *lang_dirent;
        char scripts_path[MAXPATHLEN];
        DIR *scripts_dir, *lang_dir;
        char script_path[MAXPATHLEN];
@@ -1590,19 +1595,19 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
        if (!scripts_dir)
                return -1;
 
-       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+       for_each_lang(scripts_path, scripts_dir, lang_dirent) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
-                        lang_dirent.d_name);
+                        lang_dirent->d_name);
                lang_dir = opendir(lang_path);
                if (!lang_dir)
                        continue;
 
-               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
-                       script_root = get_script_root(&script_dirent, REPORT_SUFFIX);
+               for_each_script(lang_path, lang_dir, script_dirent) {
+                       script_root = get_script_root(script_dirent, REPORT_SUFFIX);
                        if (script_root) {
                                desc = script_desc__findnew(script_root);
                                snprintf(script_path, MAXPATHLEN, "%s/%s",
-                                        lang_path, script_dirent.d_name);
+                                        lang_path, script_dirent->d_name);
                                read_script_info(desc, script_path);
                                free(script_root);
                        }
@@ -1690,7 +1695,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
  */
 int find_scripts(char **scripts_array, char **scripts_path_array)
 {
-       struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+       struct dirent *script_dirent, *lang_dirent;
        char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
        DIR *scripts_dir, *lang_dir;
        struct perf_session *session;
@@ -1713,9 +1718,9 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
                return -1;
        }
 
-       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+       for_each_lang(scripts_path, scripts_dir, lang_dirent) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
-                        lang_dirent.d_name);
+                        lang_dirent->d_name);
 #ifdef NO_LIBPERL
                if (strstr(lang_path, "perl"))
                        continue;
@@ -1729,16 +1734,16 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
                if (!lang_dir)
                        continue;
 
-               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
+               for_each_script(lang_path, lang_dir, script_dirent) {
                        /* Skip those real time scripts: xxxtop.p[yl] */
-                       if (strstr(script_dirent.d_name, "top."))
+                       if (strstr(script_dirent->d_name, "top."))
                                continue;
                        sprintf(scripts_path_array[i], "%s/%s", lang_path,
-                               script_dirent.d_name);
-                       temp = strchr(script_dirent.d_name, '.');
+                               script_dirent->d_name);
+                       temp = strchr(script_dirent->d_name, '.');
                        snprintf(scripts_array[i],
-                               (temp - script_dirent.d_name) + 1,
-                               "%s", script_dirent.d_name);
+                               (temp - script_dirent->d_name) + 1,
+                               "%s", script_dirent->d_name);
 
                        if (check_ev_match(lang_path,
                                        scripts_array[i], session))
@@ -1756,7 +1761,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
 
 static char *get_script_path(const char *script_root, const char *suffix)
 {
-       struct dirent *script_next, *lang_next, script_dirent, lang_dirent;
+       struct dirent *script_dirent, *lang_dirent;
        char scripts_path[MAXPATHLEN];
        char script_path[MAXPATHLEN];
        DIR *scripts_dir, *lang_dir;
@@ -1769,21 +1774,21 @@ static char *get_script_path(const char *script_root, const char *suffix)
        if (!scripts_dir)
                return NULL;
 
-       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
+       for_each_lang(scripts_path, scripts_dir, lang_dirent) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
-                        lang_dirent.d_name);
+                        lang_dirent->d_name);
                lang_dir = opendir(lang_path);
                if (!lang_dir)
                        continue;
 
-               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
-                       __script_root = get_script_root(&script_dirent, suffix);
+               for_each_script(lang_path, lang_dir, script_dirent) {
+                       __script_root = get_script_root(script_dirent, suffix);
                        if (__script_root && !strcmp(script_root, __script_root)) {
                                free(__script_root);
                                closedir(lang_dir);
                                closedir(scripts_dir);
                                snprintf(script_path, MAXPATHLEN, "%s/%s",
-                                        lang_path, script_dirent.d_name);
+                                        lang_path, script_dirent->d_name);
                                return strdup(script_path);
                        }
                        free(__script_root);
@@ -1961,6 +1966,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                        .exit            = perf_event__process_exit,
                        .fork            = perf_event__process_fork,
                        .attr            = process_attr,
+                       .event_update   = perf_event__process_event_update,
                        .tracing_data    = perf_event__process_tracing_data,
                        .build_id        = perf_event__process_build_id,
                        .id_index        = perf_event__process_id_index,
@@ -2022,6 +2028,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                   "only consider symbols in these pids"),
        OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
                   "only consider symbols in these tids"),
+       OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
+                    "Set the maximum stack depth when parsing the callchain, "
+                    "anything beyond the specified depth will be ignored. "
+                    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_BOOLEAN('I', "show-info", &show_full_info,
                    "display extended information from perf.data file"),
        OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -2057,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                NULL
        };
 
+       scripting_max_stack = sysctl_perf_event_max_stack;
+
        setup_scripting();
 
        argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
index 1f19f2f999c841b9da140e10bcaf5e6e0f41ee6b..5645a8361de6f494b1f9b56475dec3a5dd959d9d 100644 (file)
@@ -298,6 +298,14 @@ static int read_counter(struct perf_evsel *counter)
                                        return -1;
                                }
                        }
+
+                       if (verbose > 1) {
+                               fprintf(stat_config.output,
+                                       "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+                                               perf_evsel__name(counter),
+                                               cpu,
+                                               count->val, count->ena, count->run);
+                       }
                }
        }
 
index 833214979c4f49c7e211ecef49d8e4833635ad41..1793da5856762afbcc1ad2313d3e69e557089606 100644 (file)
@@ -688,7 +688,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
        struct hist_entry *he = iter->he;
        struct perf_evsel *evsel = iter->evsel;
 
-       if (sort__has_sym && single)
+       if (perf_hpp_list.sym && single)
                perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
 
        hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
@@ -886,7 +886,7 @@ static int perf_top__start_counters(struct perf_top *top)
        struct perf_evlist *evlist = top->evlist;
        struct record_opts *opts = &top->record_opts;
 
-       perf_evlist__config(evlist, opts);
+       perf_evlist__config(evlist, opts, &callchain_param);
 
        evlist__for_each(evlist, counter) {
 try_again:
@@ -917,15 +917,15 @@ out_err:
        return -1;
 }
 
-static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
+static int callchain_param__setup_sample_type(struct callchain_param *callchain)
 {
-       if (!sort__has_sym) {
-               if (symbol_conf.use_callchain) {
+       if (!perf_hpp_list.sym) {
+               if (callchain->enabled) {
                        ui__error("Selected -g but \"sym\" not present in --sort/-s.");
                        return -EINVAL;
                }
-       } else if (callchain_param.mode != CHAIN_NONE) {
-               if (callchain_register_param(&callchain_param) < 0) {
+       } else if (callchain->mode != CHAIN_NONE) {
+               if (callchain_register_param(callchain) < 0) {
                        ui__error("Can't register callchain params.\n");
                        return -EINVAL;
                }
@@ -952,7 +952,7 @@ static int __cmd_top(struct perf_top *top)
                        goto out_delete;
        }
 
-       ret = perf_top__setup_sample_type(top);
+       ret = callchain_param__setup_sample_type(&callchain_param);
        if (ret)
                goto out_delete;
 
@@ -962,7 +962,7 @@ static int __cmd_top(struct perf_top *top)
        machine__synthesize_threads(&top->session->machines.host, &opts->target,
                                    top->evlist->threads, false, opts->proc_map_timeout);
 
-       if (sort__has_socket) {
+       if (perf_hpp_list.socket) {
                ret = perf_env__read_cpu_topology_map(&perf_env);
                if (ret < 0)
                        goto out_err_cpu_topo;
@@ -1045,18 +1045,17 @@ callchain_opt(const struct option *opt, const char *arg, int unset)
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
-       struct record_opts *record = (struct record_opts *)opt->value;
+       struct callchain_param *callchain = opt->value;
 
-       record->callgraph_set = true;
-       callchain_param.enabled = !unset;
-       callchain_param.record_mode = CALLCHAIN_FP;
+       callchain->enabled = !unset;
+       callchain->record_mode = CALLCHAIN_FP;
 
        /*
         * --no-call-graph
         */
        if (unset) {
                symbol_conf.use_callchain = false;
-               callchain_param.record_mode = CALLCHAIN_NONE;
+               callchain->record_mode = CALLCHAIN_NONE;
                return 0;
        }
 
@@ -1104,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                        },
                        .proc_map_timeout    = 500,
                },
-               .max_stack           = PERF_MAX_STACK_DEPTH,
+               .max_stack           = sysctl_perf_event_max_stack,
                .sym_pcnt_filter     = 5,
        };
        struct record_opts *opts = &top.record_opts;
@@ -1162,17 +1161,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                   "output field(s): overhead, period, sample plus all of sort keys"),
        OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
                    "Show a column with the number of samples"),
-       OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
+       OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
                           NULL, "enables call-graph recording and display",
                           &callchain_opt),
-       OPT_CALLBACK(0, "call-graph", &top.record_opts,
+       OPT_CALLBACK(0, "call-graph", &callchain_param,
                     "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
                     top_callchain_help, &parse_callchain_opt),
        OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
                    "Accumulate callchains of children and show total overhead as well"),
        OPT_INTEGER(0, "max-stack", &top.max_stack,
                    "Set the maximum stack depth when parsing the callchain. "
-                   "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+                   "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
                   "ignore callees of these functions in call graphs",
                   report_parse_ignore_callees_opt),
@@ -1256,7 +1255,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
        sort__mode = SORT_MODE__TOP;
        /* display thread wants entries to be collapsed in a different tree */
-       sort__need_collapse = 1;
+       perf_hpp_list.need_collapse = 1;
 
        if (top.use_stdio)
                use_browser = 0;
@@ -1312,7 +1311,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 
        top.sym_evsel = perf_evlist__first(top.evlist);
 
-       if (!symbol_conf.use_callchain) {
+       if (!callchain_param.enabled) {
                symbol_conf.cumulate_callchain = false;
                perf_hpp__cancel_cumulate();
        }
index 93ac724fb635ce71236615aa80014b67a740fa08..709963740f9a52ae98e5096848e6fe351584a87b 100644 (file)
 #include "trace-event.h"
 #include "util/parse-events.h"
 #include "util/bpf-loader.h"
+#include "callchain.h"
+#include "syscalltbl.h"
+#include "rb_resort.h"
 
-#include <libaudit.h>
+#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
 #include <stdlib.h>
-#include <sys/mman.h>
-#include <linux/futex.h>
 #include <linux/err.h>
-
-/* For older distros: */
-#ifndef MAP_STACK
-# define MAP_STACK             0x20000
-#endif
-
-#ifndef MADV_HWPOISON
-# define MADV_HWPOISON         100
-
-#endif
-
-#ifndef MADV_MERGEABLE
-# define MADV_MERGEABLE                12
-#endif
-
-#ifndef MADV_UNMERGEABLE
-# define MADV_UNMERGEABLE      13
-#endif
-
-#ifndef EFD_SEMAPHORE
-# define EFD_SEMAPHORE         1
-#endif
-
-#ifndef EFD_NONBLOCK
-# define EFD_NONBLOCK          00004000
-#endif
-
-#ifndef EFD_CLOEXEC
-# define EFD_CLOEXEC           02000000
-#endif
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+#include <linux/audit.h>
+#include <sys/ptrace.h>
+#include <linux/random.h>
+#include <linux/stringify.h>
 
 #ifndef O_CLOEXEC
 # define O_CLOEXEC             02000000
 #endif
 
-#ifndef SOCK_DCCP
-# define SOCK_DCCP             6
-#endif
-
-#ifndef SOCK_CLOEXEC
-# define SOCK_CLOEXEC          02000000
-#endif
-
-#ifndef SOCK_NONBLOCK
-# define SOCK_NONBLOCK         00004000
-#endif
-
-#ifndef MSG_CMSG_CLOEXEC
-# define MSG_CMSG_CLOEXEC      0x40000000
-#endif
-
-#ifndef PERF_FLAG_FD_NO_GROUP
-# define PERF_FLAG_FD_NO_GROUP         (1UL << 0)
-#endif
-
-#ifndef PERF_FLAG_FD_OUTPUT
-# define PERF_FLAG_FD_OUTPUT           (1UL << 1)
-#endif
-
-#ifndef PERF_FLAG_PID_CGROUP
-# define PERF_FLAG_PID_CGROUP          (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#endif
-
-#ifndef PERF_FLAG_FD_CLOEXEC
-# define PERF_FLAG_FD_CLOEXEC          (1UL << 3) /* O_CLOEXEC */
-#endif
-
+struct trace {
+       struct perf_tool        tool;
+       struct syscalltbl       *sctbl;
+       struct {
+               int             max;
+               struct syscall  *table;
+               struct {
+                       struct perf_evsel *sys_enter,
+                                         *sys_exit;
+               }               events;
+       } syscalls;
+       struct record_opts      opts;
+       struct perf_evlist      *evlist;
+       struct machine          *host;
+       struct thread           *current;
+       u64                     base_time;
+       FILE                    *output;
+       unsigned long           nr_events;
+       struct strlist          *ev_qualifier;
+       struct {
+               size_t          nr;
+               int             *entries;
+       }                       ev_qualifier_ids;
+       struct intlist          *tid_list;
+       struct intlist          *pid_list;
+       struct {
+               size_t          nr;
+               pid_t           *entries;
+       }                       filter_pids;
+       double                  duration_filter;
+       double                  runtime_ms;
+       struct {
+               u64             vfs_getname,
+                               proc_getname;
+       } stats;
+       unsigned int            max_stack;
+       unsigned int            min_stack;
+       bool                    not_ev_qualifier;
+       bool                    live;
+       bool                    full_time;
+       bool                    sched;
+       bool                    multiple_threads;
+       bool                    summary;
+       bool                    summary_only;
+       bool                    show_comm;
+       bool                    show_tool_stats;
+       bool                    trace_syscalls;
+       bool                    kernel_syscallchains;
+       bool                    force;
+       bool                    vfs_getname;
+       int                     trace_pgfaults;
+       int                     open_id;
+};
 
 struct tp_field {
        int offset;
@@ -371,147 +369,6 @@ static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
 
 #define SCA_INT syscall_arg__scnprintf_int
 
-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
-                                              struct syscall_arg *arg)
-{
-       int printed = 0, prot = arg->val;
-
-       if (prot == PROT_NONE)
-               return scnprintf(bf, size, "NONE");
-#define        P_MMAP_PROT(n) \
-       if (prot & PROT_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               prot &= ~PROT_##n; \
-       }
-
-       P_MMAP_PROT(EXEC);
-       P_MMAP_PROT(READ);
-       P_MMAP_PROT(WRITE);
-#ifdef PROT_SEM
-       P_MMAP_PROT(SEM);
-#endif
-       P_MMAP_PROT(GROWSDOWN);
-       P_MMAP_PROT(GROWSUP);
-#undef P_MMAP_PROT
-
-       if (prot)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
-
-       return printed;
-}
-
-#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
-
-static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
-                                               struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-#define        P_MMAP_FLAG(n) \
-       if (flags & MAP_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~MAP_##n; \
-       }
-
-       P_MMAP_FLAG(SHARED);
-       P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
-       P_MMAP_FLAG(32BIT);
-#endif
-       P_MMAP_FLAG(ANONYMOUS);
-       P_MMAP_FLAG(DENYWRITE);
-       P_MMAP_FLAG(EXECUTABLE);
-       P_MMAP_FLAG(FILE);
-       P_MMAP_FLAG(FIXED);
-       P_MMAP_FLAG(GROWSDOWN);
-#ifdef MAP_HUGETLB
-       P_MMAP_FLAG(HUGETLB);
-#endif
-       P_MMAP_FLAG(LOCKED);
-       P_MMAP_FLAG(NONBLOCK);
-       P_MMAP_FLAG(NORESERVE);
-       P_MMAP_FLAG(POPULATE);
-       P_MMAP_FLAG(STACK);
-#ifdef MAP_UNINITIALIZED
-       P_MMAP_FLAG(UNINITIALIZED);
-#endif
-#undef P_MMAP_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-
-static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
-                                                 struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-#define P_MREMAP_FLAG(n) \
-       if (flags & MREMAP_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~MREMAP_##n; \
-       }
-
-       P_MREMAP_FLAG(MAYMOVE);
-#ifdef MREMAP_FIXED
-       P_MREMAP_FLAG(FIXED);
-#endif
-#undef P_MREMAP_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
-
-static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
-                                                     struct syscall_arg *arg)
-{
-       int behavior = arg->val;
-
-       switch (behavior) {
-#define        P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
-       P_MADV_BHV(NORMAL);
-       P_MADV_BHV(RANDOM);
-       P_MADV_BHV(SEQUENTIAL);
-       P_MADV_BHV(WILLNEED);
-       P_MADV_BHV(DONTNEED);
-       P_MADV_BHV(REMOVE);
-       P_MADV_BHV(DONTFORK);
-       P_MADV_BHV(DOFORK);
-       P_MADV_BHV(HWPOISON);
-#ifdef MADV_SOFT_OFFLINE
-       P_MADV_BHV(SOFT_OFFLINE);
-#endif
-       P_MADV_BHV(MERGEABLE);
-       P_MADV_BHV(UNMERGEABLE);
-#ifdef MADV_HUGEPAGE
-       P_MADV_BHV(HUGEPAGE);
-#endif
-#ifdef MADV_NOHUGEPAGE
-       P_MADV_BHV(NOHUGEPAGE);
-#endif
-#ifdef MADV_DONTDUMP
-       P_MADV_BHV(DONTDUMP);
-#endif
-#ifdef MADV_DODUMP
-       P_MADV_BHV(DODUMP);
-#endif
-#undef P_MADV_PHV
-       default: break;
-       }
-
-       return scnprintf(bf, size, "%#x", behavior);
-}
-
-#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
-
 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
                                           struct syscall_arg *arg)
 {
@@ -543,49 +400,6 @@ static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
 
 #define SCA_FLOCK syscall_arg__scnprintf_flock
 
-static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
-{
-       enum syscall_futex_args {
-               SCF_UADDR   = (1 << 0),
-               SCF_OP      = (1 << 1),
-               SCF_VAL     = (1 << 2),
-               SCF_TIMEOUT = (1 << 3),
-               SCF_UADDR2  = (1 << 4),
-               SCF_VAL3    = (1 << 5),
-       };
-       int op = arg->val;
-       int cmd = op & FUTEX_CMD_MASK;
-       size_t printed = 0;
-
-       switch (cmd) {
-#define        P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
-       P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
-       P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
-       P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
-       P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
-       P_FUTEX_OP(WAKE_OP);                                                      break;
-       P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-       P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
-       P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
-       P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
-       P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
-       default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
-       }
-
-       if (op & FUTEX_PRIVATE_FLAG)
-               printed += scnprintf(bf + printed, size - printed, "|PRIV");
-
-       if (op & FUTEX_CLOCK_REALTIME)
-               printed += scnprintf(bf + printed, size - printed, "|CLKRT");
-
-       return printed;
-}
-
-#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
-
 static const char *bpf_cmd[] = {
        "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
        "MAP_GET_NEXT_KEY", "PROG_LOAD",
@@ -652,110 +466,6 @@ static const char *socket_families[] = {
 };
 static DEFINE_STRARRAY(socket_families);
 
-#ifndef SOCK_TYPE_MASK
-#define SOCK_TYPE_MASK 0xf
-#endif
-
-static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
-                                                     struct syscall_arg *arg)
-{
-       size_t printed;
-       int type = arg->val,
-           flags = type & ~SOCK_TYPE_MASK;
-
-       type &= SOCK_TYPE_MASK;
-       /*
-        * Can't use a strarray, MIPS may override for ABI reasons.
-        */
-       switch (type) {
-#define        P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
-       P_SK_TYPE(STREAM);
-       P_SK_TYPE(DGRAM);
-       P_SK_TYPE(RAW);
-       P_SK_TYPE(RDM);
-       P_SK_TYPE(SEQPACKET);
-       P_SK_TYPE(DCCP);
-       P_SK_TYPE(PACKET);
-#undef P_SK_TYPE
-       default:
-               printed = scnprintf(bf, size, "%#x", type);
-       }
-
-#define        P_SK_FLAG(n) \
-       if (flags & SOCK_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
-               flags &= ~SOCK_##n; \
-       }
-
-       P_SK_FLAG(CLOEXEC);
-       P_SK_FLAG(NONBLOCK);
-#undef P_SK_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
-
-       return printed;
-}
-
-#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
-
-#ifndef MSG_PROBE
-#define MSG_PROBE           0x10
-#endif
-#ifndef MSG_WAITFORONE
-#define MSG_WAITFORONE 0x10000
-#endif
-#ifndef MSG_SENDPAGE_NOTLAST
-#define MSG_SENDPAGE_NOTLAST 0x20000
-#endif
-#ifndef MSG_FASTOPEN
-#define MSG_FASTOPEN        0x20000000
-#endif
-
-static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
-                                              struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
-
-       if (flags == 0)
-               return scnprintf(bf, size, "NONE");
-#define        P_MSG_FLAG(n) \
-       if (flags & MSG_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~MSG_##n; \
-       }
-
-       P_MSG_FLAG(OOB);
-       P_MSG_FLAG(PEEK);
-       P_MSG_FLAG(DONTROUTE);
-       P_MSG_FLAG(TRYHARD);
-       P_MSG_FLAG(CTRUNC);
-       P_MSG_FLAG(PROBE);
-       P_MSG_FLAG(TRUNC);
-       P_MSG_FLAG(DONTWAIT);
-       P_MSG_FLAG(EOR);
-       P_MSG_FLAG(WAITALL);
-       P_MSG_FLAG(FIN);
-       P_MSG_FLAG(SYN);
-       P_MSG_FLAG(CONFIRM);
-       P_MSG_FLAG(RST);
-       P_MSG_FLAG(ERRQUEUE);
-       P_MSG_FLAG(NOSIGNAL);
-       P_MSG_FLAG(MORE);
-       P_MSG_FLAG(WAITFORONE);
-       P_MSG_FLAG(SENDPAGE_NOTLAST);
-       P_MSG_FLAG(FASTOPEN);
-       P_MSG_FLAG(CMSG_CLOEXEC);
-#undef P_MSG_FLAG
-
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-       return printed;
-}
-
-#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
-
 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
                                                 struct syscall_arg *arg)
 {
@@ -788,52 +498,19 @@ static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
 
 #define SCA_FILENAME syscall_arg__scnprintf_filename
 
-static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
-                                              struct syscall_arg *arg)
+static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
+                                               struct syscall_arg *arg)
 {
        int printed = 0, flags = arg->val;
 
-       if (!(flags & O_CREAT))
-               arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
-
-       if (flags == 0)
-               return scnprintf(bf, size, "RDONLY");
 #define        P_FLAG(n) \
        if (flags & O_##n) { \
                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
                flags &= ~O_##n; \
        }
 
-       P_FLAG(APPEND);
-       P_FLAG(ASYNC);
        P_FLAG(CLOEXEC);
-       P_FLAG(CREAT);
-       P_FLAG(DIRECT);
-       P_FLAG(DIRECTORY);
-       P_FLAG(EXCL);
-       P_FLAG(LARGEFILE);
-       P_FLAG(NOATIME);
-       P_FLAG(NOCTTY);
-#ifdef O_NONBLOCK
        P_FLAG(NONBLOCK);
-#elif O_NDELAY
-       P_FLAG(NDELAY);
-#endif
-#ifdef O_PATH
-       P_FLAG(PATH);
-#endif
-       P_FLAG(RDWR);
-#ifdef O_DSYNC
-       if ((flags & O_SYNC) == O_SYNC)
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
-       else {
-               P_FLAG(DSYNC);
-       }
-#else
-       P_FLAG(SYNC);
-#endif
-       P_FLAG(TRUNC);
-       P_FLAG(WRONLY);
 #undef P_FLAG
 
        if (flags)
@@ -842,52 +519,76 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
        return printed;
 }
 
-#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
+#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 
-static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
-                                               struct syscall_arg *arg)
-{
-       int printed = 0, flags = arg->val;
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * FIXME: Make this available to all arches.
+ */
+#define TCGETS         0x5401
 
-       if (flags == 0)
-               return 0;
+static const char *tioctls[] = {
+       "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
+       "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
+       "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
+       "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
+       "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
+       "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
+       "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
+       "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
+       "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
+       "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
+       "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
+       [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
+       "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
+       "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
+       "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
+};
 
-#define        P_FLAG(n) \
-       if (flags & PERF_FLAG_##n) { \
-               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~PERF_FLAG_##n; \
-       }
+static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
+#endif /* defined(__i386__) || defined(__x86_64__) */
 
-       P_FLAG(FD_NO_GROUP);
-       P_FLAG(FD_OUTPUT);
-       P_FLAG(PID_CGROUP);
-       P_FLAG(FD_CLOEXEC);
-#undef P_FLAG
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
 
-       if (flags)
-               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+       int op = arg->val;
+       size_t printed = 0;
+
+       switch (op) {
+#define        P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
+       P_SECCOMP_SET_MODE_OP(STRICT);
+       P_SECCOMP_SET_MODE_OP(FILTER);
+#undef P_SECCOMP_SET_MODE_OP
+       default: printed = scnprintf(bf, size, "%#x", op);                        break;
+       }
 
        return printed;
 }
 
-#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
+#define SCA_SECCOMP_OP  syscall_arg__scnprintf_seccomp_op
 
-static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
                                                   struct syscall_arg *arg)
 {
        int printed = 0, flags = arg->val;
 
-       if (flags == 0)
-               return scnprintf(bf, size, "NONE");
 #define        P_FLAG(n) \
-       if (flags & EFD_##n) { \
+       if (flags & SECCOMP_FILTER_FLAG_##n) { \
                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~EFD_##n; \
+               flags &= ~SECCOMP_FILTER_FLAG_##n; \
        }
 
-       P_FLAG(SEMAPHORE);
-       P_FLAG(CLOEXEC);
-       P_FLAG(NONBLOCK);
+       P_FLAG(TSYNC);
 #undef P_FLAG
 
        if (flags)
@@ -896,20 +597,27 @@ static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
        return printed;
 }
 
-#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
+#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
 
-static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
-                                               struct syscall_arg *arg)
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK  0x0001
+#endif
+#ifndef GRND_RANDOM
+#define GRND_RANDOM    0x0002
+#endif
+
+static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
+                                                  struct syscall_arg *arg)
 {
        int printed = 0, flags = arg->val;
 
 #define        P_FLAG(n) \
-       if (flags & O_##n) { \
+       if (flags & GRND_##n) { \
                printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-               flags &= ~O_##n; \
+               flags &= ~GRND_##n; \
        }
 
-       P_FLAG(CLOEXEC);
+       P_FLAG(RANDOM);
        P_FLAG(NONBLOCK);
 #undef P_FLAG
 
@@ -919,98 +627,32 @@ static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
        return printed;
 }
 
-#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
-
-static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
-{
-       int sig = arg->val;
-
-       switch (sig) {
-#define        P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
-       P_SIGNUM(HUP);
-       P_SIGNUM(INT);
-       P_SIGNUM(QUIT);
-       P_SIGNUM(ILL);
-       P_SIGNUM(TRAP);
-       P_SIGNUM(ABRT);
-       P_SIGNUM(BUS);
-       P_SIGNUM(FPE);
-       P_SIGNUM(KILL);
-       P_SIGNUM(USR1);
-       P_SIGNUM(SEGV);
-       P_SIGNUM(USR2);
-       P_SIGNUM(PIPE);
-       P_SIGNUM(ALRM);
-       P_SIGNUM(TERM);
-       P_SIGNUM(CHLD);
-       P_SIGNUM(CONT);
-       P_SIGNUM(STOP);
-       P_SIGNUM(TSTP);
-       P_SIGNUM(TTIN);
-       P_SIGNUM(TTOU);
-       P_SIGNUM(URG);
-       P_SIGNUM(XCPU);
-       P_SIGNUM(XFSZ);
-       P_SIGNUM(VTALRM);
-       P_SIGNUM(PROF);
-       P_SIGNUM(WINCH);
-       P_SIGNUM(IO);
-       P_SIGNUM(PWR);
-       P_SIGNUM(SYS);
-#ifdef SIGEMT
-       P_SIGNUM(EMT);
-#endif
-#ifdef SIGSTKFLT
-       P_SIGNUM(STKFLT);
-#endif
-#ifdef SIGSWI
-       P_SIGNUM(SWI);
-#endif
-       default: break;
-       }
-
-       return scnprintf(bf, size, "%#x", sig);
-}
-
-#define SCA_SIGNUM syscall_arg__scnprintf_signum
-
-#if defined(__i386__) || defined(__x86_64__)
-/*
- * FIXME: Make this available to all arches.
- */
-#define TCGETS         0x5401
-
-static const char *tioctls[] = {
-       "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
-       "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
-       "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
-       "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
-       "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
-       "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
-       "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
-       "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
-       "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
-       "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
-       "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
-       [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
-       "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
-       "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
-       "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
-};
-
-static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
-#endif /* defined(__i386__) || defined(__x86_64__) */
+#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
 
 #define STRARRAY(arg, name, array) \
          .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
          .arg_parm      = { [arg] = &strarray__##array, }
 
+#include "trace/beauty/eventfd.c"
+#include "trace/beauty/futex_op.c"
+#include "trace/beauty/mmap.c"
+#include "trace/beauty/mode_t.c"
+#include "trace/beauty/msg_flags.c"
+#include "trace/beauty/open_flags.c"
+#include "trace/beauty/perf_event_open.c"
+#include "trace/beauty/pid.c"
+#include "trace/beauty/sched_policy.c"
+#include "trace/beauty/signum.c"
+#include "trace/beauty/socket_type.c"
+#include "trace/beauty/waitid_options.c"
+
 static struct syscall_fmt {
        const char *name;
        const char *alias;
        size_t     (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
        void       *arg_parm[6];
        bool       errmsg;
+       bool       errpid;
        bool       timeout;
        bool       hexret;
 } syscall_fmts[] = {
@@ -1028,6 +670,7 @@ static struct syscall_fmt {
        { .name     = "chroot",     .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
        { .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
+       { .name     = "clone",      .errpid = true, },
        { .name     = "close",      .errmsg = true,
          .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
        { .name     = "connect",    .errmsg = true, },
@@ -1093,6 +736,11 @@ static struct syscall_fmt {
        { .name     = "getdents64", .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
        { .name     = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+       { .name     = "getpid",     .errpid = true, },
+       { .name     = "getpgid",    .errpid = true, },
+       { .name     = "getppid",    .errpid = true, },
+       { .name     = "getrandom",  .errmsg = true,
+         .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
        { .name     = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
        { .name     = "getxattr",    .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
@@ -1186,8 +834,7 @@ static struct syscall_fmt {
                             [1] = SCA_FILENAME, /* filename */
                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
        { .name     = "perf_event_open", .errmsg = true,
-         .arg_scnprintf = { [1] = SCA_INT, /* pid */
-                            [2] = SCA_INT, /* cpu */
+         .arg_scnprintf = { [2] = SCA_INT, /* cpu */
                             [3] = SCA_FD,  /* group_fd */
                             [4] = SCA_PERF_FLAGS,  /* flags */ }, },
        { .name     = "pipe2",      .errmsg = true,
@@ -1234,6 +881,11 @@ static struct syscall_fmt {
          .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
        { .name     = "rt_tgsigqueueinfo", .errmsg = true,
          .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+       { .name     = "sched_setscheduler",   .errmsg = true,
+         .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
+       { .name     = "seccomp", .errmsg = true,
+         .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
+                            [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
        { .name     = "select",     .errmsg = true, .timeout = true, },
        { .name     = "sendmmsg",    .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FD, /* fd */
@@ -1244,7 +896,9 @@ static struct syscall_fmt {
        { .name     = "sendto",     .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FD, /* fd */
                             [3] = SCA_MSG_FLAGS, /* flags */ }, },
+       { .name     = "set_tid_address", .errpid = true, },
        { .name     = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+       { .name     = "setpgid",    .errmsg = true, },
        { .name     = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
        { .name     = "setxattr",   .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
@@ -1287,6 +941,10 @@ static struct syscall_fmt {
          .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
        { .name     = "vmsplice",  .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+       { .name     = "wait4",      .errpid = true,
+         .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
+       { .name     = "waitid",     .errpid = true,
+         .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
        { .name     = "write",      .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
        { .name     = "writev",     .errmsg = true,
@@ -1398,59 +1056,6 @@ fail:
 
 static const size_t trace__entry_str_size = 2048;
 
-struct trace {
-       struct perf_tool        tool;
-       struct {
-               int             machine;
-               int             open_id;
-       }                       audit;
-       struct {
-               int             max;
-               struct syscall  *table;
-               struct {
-                       struct perf_evsel *sys_enter,
-                                         *sys_exit;
-               }               events;
-       } syscalls;
-       struct record_opts      opts;
-       struct perf_evlist      *evlist;
-       struct machine          *host;
-       struct thread           *current;
-       u64                     base_time;
-       FILE                    *output;
-       unsigned long           nr_events;
-       struct strlist          *ev_qualifier;
-       struct {
-               size_t          nr;
-               int             *entries;
-       }                       ev_qualifier_ids;
-       struct intlist          *tid_list;
-       struct intlist          *pid_list;
-       struct {
-               size_t          nr;
-               pid_t           *entries;
-       }                       filter_pids;
-       double                  duration_filter;
-       double                  runtime_ms;
-       struct {
-               u64             vfs_getname,
-                               proc_getname;
-       } stats;
-       bool                    not_ev_qualifier;
-       bool                    live;
-       bool                    full_time;
-       bool                    sched;
-       bool                    multiple_threads;
-       bool                    summary;
-       bool                    summary_only;
-       bool                    show_comm;
-       bool                    show_tool_stats;
-       bool                    trace_syscalls;
-       bool                    force;
-       bool                    vfs_getname;
-       int                     trace_pgfaults;
-};
-
 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
 {
        struct thread_trace *ttrace = thread__priv(thread);
@@ -1618,6 +1223,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine,
                color_fprintf(trace->output, PERF_COLOR_RED,
                              "LOST %" PRIu64 " events!\n", event->lost.lost);
                ret = machine__process_lost_event(machine, event, sample);
+               break;
        default:
                ret = machine__process_event(machine, event, sample);
                break;
@@ -1675,6 +1281,10 @@ static int syscall__set_arg_fmts(struct syscall *sc)
                        sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
                else if (field->flags & FIELD_IS_POINTER)
                        sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
+               else if (strcmp(field->type, "pid_t") == 0)
+                       sc->arg_scnprintf[idx] = SCA_PID;
+               else if (strcmp(field->type, "umode_t") == 0)
+                       sc->arg_scnprintf[idx] = SCA_MODE_T;
                ++idx;
        }
 
@@ -1685,7 +1295,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 {
        char tp_name[128];
        struct syscall *sc;
-       const char *name = audit_syscall_to_name(id, trace->audit.machine);
+       const char *name = syscalltbl__name(trace->sctbl, id);
 
        if (name == NULL)
                return -1;
@@ -1760,7 +1370,7 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 
        strlist__for_each(pos, trace->ev_qualifier) {
                const char *sc = pos->s;
-               int id = audit_name_to_syscall(sc, trace->audit.machine);
+               int id = syscalltbl__id(trace->sctbl, sc);
 
                if (id < 0) {
                        if (err == 0) {
@@ -1846,7 +1456,12 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
                                                     "%ld", val);
                        }
                }
-       } else {
+       } else if (IS_ERR(sc->tp_format)) {
+               /*
+                * If we managed to read the tracepoint /format file, then we
+                * may end up not having any args, like with gettid(), so only
+                * print the raw args when we didn't manage to read it.
+                */
                int i = 0;
 
                while (i < 6) {
@@ -1987,7 +1602,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                        goto out_put;
        }
 
-       if (!trace->summary_only)
+       if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
                trace__printf_interrupted_entry(trace, sample);
 
        ttrace->entry_time = sample->time;
@@ -1998,7 +1613,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
                                           args, trace, thread);
 
        if (sc->is_exit) {
-               if (!trace->duration_filter && !trace->summary_only) {
+               if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
                        trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
                        fprintf(trace->output, "%-70s\n", ttrace->entry_str);
                }
@@ -2018,6 +1633,29 @@ out_put:
        return err;
 }
 
+static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
+                                   struct perf_sample *sample,
+                                   struct callchain_cursor *cursor)
+{
+       struct addr_location al;
+
+       if (machine__resolve(trace->host, &al, sample) < 0 ||
+           thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
+               return -1;
+
+       return 0;
+}
+
+static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
+{
+       /* TODO: user-configurable print_opts */
+       const unsigned int print_opts = EVSEL__PRINT_SYM |
+                                       EVSEL__PRINT_DSO |
+                                       EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+       return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+}
+
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
                           union perf_event *event __maybe_unused,
                           struct perf_sample *sample)
@@ -2025,7 +1663,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        long ret;
        u64 duration = 0;
        struct thread *thread;
-       int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+       int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
        struct syscall *sc = trace__syscall_info(trace, evsel, id);
        struct thread_trace *ttrace;
 
@@ -2042,7 +1680,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 
        ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
 
-       if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
+       if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
                trace__set_fd_pathname(thread, ret, ttrace->filename.name);
                ttrace->filename.pending_open = false;
                ++trace->stats.vfs_getname;
@@ -2057,6 +1695,15 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        } else if (trace->duration_filter)
                goto out;
 
+       if (sample->callchain) {
+               callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+               if (callchain_ret == 0) {
+                       if (callchain_cursor.nr < trace->min_stack)
+                               goto out;
+                       callchain_ret = 1;
+               }
+       }
+
        if (trace->summary_only)
                goto out;
 
@@ -2073,7 +1720,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
        if (sc->fmt == NULL) {
 signed_print:
                fprintf(trace->output, ") = %ld", ret);
-       } else if (ret < 0 && sc->fmt->errmsg) {
+       } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
                char bf[STRERR_BUFSIZE];
                const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
                           *e = audit_errno_to_name(-ret);
@@ -2083,10 +1730,24 @@ signed_print:
                fprintf(trace->output, ") = 0 Timeout");
        else if (sc->fmt->hexret)
                fprintf(trace->output, ") = %#lx", ret);
-       else
+       else if (sc->fmt->errpid) {
+               struct thread *child = machine__find_thread(trace->host, ret, ret);
+
+               if (child != NULL) {
+                       fprintf(trace->output, ") = %ld", ret);
+                       if (child->comm_set)
+                               fprintf(trace->output, " (%s)", thread__comm_str(child));
+                       thread__put(child);
+               }
+       } else
                goto signed_print;
 
        fputc('\n', trace->output);
+
+       if (callchain_ret > 0)
+               trace__fprintf_callchain(trace, sample);
+       else if (callchain_ret < 0)
+               pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
        ttrace->entry_pending = false;
        err = 0;
@@ -2217,6 +1878,17 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
                                union perf_event *event __maybe_unused,
                                struct perf_sample *sample)
 {
+       int callchain_ret = 0;
+
+       if (sample->callchain) {
+               callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+               if (callchain_ret == 0) {
+                       if (callchain_cursor.nr < trace->min_stack)
+                               goto out;
+                       callchain_ret = 1;
+               }
+       }
+
        trace__printf_interrupted_entry(trace, sample);
        trace__fprintf_tstamp(trace, sample->time, trace->output);
 
@@ -2234,6 +1906,12 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
        }
 
        fprintf(trace->output, ")\n");
+
+       if (callchain_ret > 0)
+               trace__fprintf_callchain(trace, sample);
+       else if (callchain_ret < 0)
+               pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
        return 0;
 }
 
@@ -2264,8 +1942,19 @@ static int trace__pgfault(struct trace *trace,
        char map_type = 'd';
        struct thread_trace *ttrace;
        int err = -1;
+       int callchain_ret = 0;
 
        thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+       if (sample->callchain) {
+               callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+               if (callchain_ret == 0) {
+                       if (callchain_cursor.nr < trace->min_stack)
+                               goto out_put;
+                       callchain_ret = 1;
+               }
+       }
+
        ttrace = thread__trace(thread, trace->output);
        if (ttrace == NULL)
                goto out_put;
@@ -2307,6 +1996,11 @@ static int trace__pgfault(struct trace *trace,
        print_location(trace->output, sample, &al, true, false);
 
        fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+       if (callchain_ret > 0)
+               trace__fprintf_callchain(trace, sample);
+       else if (callchain_ret < 0)
+               pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
        err = 0;
 out_put:
@@ -2326,6 +2020,23 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample)
        return false;
 }
 
+static void trace__set_base_time(struct trace *trace,
+                                struct perf_evsel *evsel,
+                                struct perf_sample *sample)
+{
+       /*
+        * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
+        * and don't use sample->time unconditionally, we may end up having
+        * some other event in the future without PERF_SAMPLE_TIME for good
+        * reason, i.e. we may not be interested in its timestamps, just in
+        * it taking place, picking some piece of information when it
+        * appears in our event stream (vfs_getname comes to mind).
+        */
+       if (trace->base_time == 0 && !trace->full_time &&
+           (evsel->attr.sample_type & PERF_SAMPLE_TIME))
+               trace->base_time = sample->time;
+}
+
 static int trace__process_sample(struct perf_tool *tool,
                                 union perf_event *event,
                                 struct perf_sample *sample,
@@ -2340,8 +2051,7 @@ static int trace__process_sample(struct perf_tool *tool,
        if (skip_sample(trace, sample))
                return 0;
 
-       if (!trace->full_time && trace->base_time == 0)
-               trace->base_time = sample->time;
+       trace__set_base_time(trace, evsel, sample);
 
        if (handler) {
                ++trace->nr_events;
@@ -2450,8 +2160,7 @@ static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
        return true;
 }
 
-static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
-                                   u64 config)
+static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
 {
        struct perf_evsel *evsel;
        struct perf_event_attr attr = {
@@ -2465,13 +2174,10 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
        event_attr_init(&attr);
 
        evsel = perf_evsel__new(&attr);
-       if (!evsel)
-               return -ENOMEM;
-
-       evsel->handler = trace__pgfault;
-       perf_evlist__add(evlist, evsel);
+       if (evsel)
+               evsel->handler = trace__pgfault;
 
-       return 0;
+       return evsel;
 }
 
 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
@@ -2479,9 +2185,6 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
        const u32 type = event->header.type;
        struct perf_evsel *evsel;
 
-       if (!trace->full_time && trace->base_time == 0)
-               trace->base_time = sample->time;
-
        if (type != PERF_RECORD_SAMPLE) {
                trace__process_event(trace, trace->host, event, sample);
                return;
@@ -2493,6 +2196,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
                return;
        }
 
+       trace__set_base_time(trace, evsel, sample);
+
        if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
            sample->raw_data == NULL) {
                fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
@@ -2527,6 +2232,15 @@ static int trace__add_syscall_newtp(struct trace *trace)
        perf_evlist__add(evlist, sys_enter);
        perf_evlist__add(evlist, sys_exit);
 
+       if (callchain_param.enabled && !trace->kernel_syscallchains) {
+               /*
+                * We're interested only in the user space callchain
+                * leading to the syscall, allow overriding that for
+                * debugging reasons using --kernel_syscall_callchains
+                */
+               sys_exit->attr.exclude_callchain_kernel = 1;
+       }
+
        trace->syscalls.events.sys_enter = sys_enter;
        trace->syscalls.events.sys_exit  = sys_exit;
 
@@ -2565,7 +2279,7 @@ out_enomem:
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
        struct perf_evlist *evlist = trace->evlist;
-       struct perf_evsel *evsel;
+       struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
        int err = -1, i;
        unsigned long before;
        const bool forks = argc > 0;
@@ -2579,14 +2293,19 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (trace->trace_syscalls)
                trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
 
-       if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
-           perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
-               goto out_error_mem;
+       if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
+               pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+               if (pgfault_maj == NULL)
+                       goto out_error_mem;
+               perf_evlist__add(evlist, pgfault_maj);
        }
 
-       if ((trace->trace_pgfaults & TRACE_PFMIN) &&
-           perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
-               goto out_error_mem;
+       if ((trace->trace_pgfaults & TRACE_PFMIN)) {
+               pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+               if (pgfault_min == NULL)
+                       goto out_error_mem;
+               perf_evlist__add(evlist, pgfault_min);
+       }
 
        if (trace->sched &&
            perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
@@ -2605,7 +2324,45 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
                goto out_delete_evlist;
        }
 
-       perf_evlist__config(evlist, &trace->opts);
+       perf_evlist__config(evlist, &trace->opts, NULL);
+
+       if (callchain_param.enabled) {
+               bool use_identifier = false;
+
+               if (trace->syscalls.events.sys_exit) {
+                       perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
+                                                    &trace->opts, &callchain_param);
+                       use_identifier = true;
+               }
+
+               if (pgfault_maj) {
+                       perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+                       use_identifier = true;
+               }
+
+               if (pgfault_min) {
+                       perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+                       use_identifier = true;
+               }
+
+               if (use_identifier) {
+                      /*
+                       * Now we have evsels with different sample_ids, use
+                       * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
+                       * from a fixed position in each ring buffer record.
+                       *
+                       * As of this the changeset introducing this comment, this
+                       * isn't strictly needed, as the fields that can come before
+                       * PERF_SAMPLE_ID are all used, but we'll probably disable
+                       * some of those for things like copying the payload of
+                       * pointer syscall arguments, and for vfs_getname we don't
+                       * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
+                       * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
+                       */
+                       perf_evlist__set_sample_bit(evlist, IDENTIFIER);
+                       perf_evlist__reset_sample_bit(evlist, ID);
+               }
+       }
 
        signal(SIGCHLD, sig_handler);
        signal(SIGINT, sig_handler);
@@ -2883,15 +2640,29 @@ static size_t trace__fprintf_threads_header(FILE *fp)
        return printed;
 }
 
+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+       struct stats    *stats;
+       double          msecs;
+       int             syscall;
+)
+{
+       struct int_node *source = rb_entry(nd, struct int_node, rb_node);
+       struct stats *stats = source->priv;
+
+       entry->syscall = source->i;
+       entry->stats   = stats;
+       entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
+}
+
 static size_t thread__dump_stats(struct thread_trace *ttrace,
                                 struct trace *trace, FILE *fp)
 {
-       struct stats *stats;
        size_t printed = 0;
        struct syscall *sc;
-       struct int_node *inode = intlist__first(ttrace->syscall_stats);
+       struct rb_node *nd;
+       DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
 
-       if (inode == NULL)
+       if (syscall_stats == NULL)
                return 0;
 
        printed += fprintf(fp, "\n");
@@ -2900,9 +2671,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
        printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
        printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
 
-       /* each int_node is a syscall */
-       while (inode) {
-               stats = inode->priv;
+       resort_rb__for_each(nd, syscall_stats) {
+               struct stats *stats = syscall_stats_entry->stats;
                if (stats) {
                        double min = (double)(stats->min) / NSEC_PER_MSEC;
                        double max = (double)(stats->max) / NSEC_PER_MSEC;
@@ -2913,34 +2683,23 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
                        pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
                        avg /= NSEC_PER_MSEC;
 
-                       sc = &trace->syscalls.table[inode->i];
+                       sc = &trace->syscalls.table[syscall_stats_entry->syscall];
                        printed += fprintf(fp, "   %-15s", sc->name);
                        printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
-                                          n, avg * n, min, avg);
+                                          n, syscall_stats_entry->msecs, min, avg);
                        printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
                }
-
-               inode = intlist__next(inode);
        }
 
+       resort_rb__delete(syscall_stats);
        printed += fprintf(fp, "\n\n");
 
        return printed;
 }
 
-/* struct used to pass data to per-thread function */
-struct summary_data {
-       FILE *fp;
-       struct trace *trace;
-       size_t printed;
-};
-
-static int trace__fprintf_one_thread(struct thread *thread, void *priv)
+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
 {
-       struct summary_data *data = priv;
-       FILE *fp = data->fp;
-       size_t printed = data->printed;
-       struct trace *trace = data->trace;
+       size_t printed = 0;
        struct thread_trace *ttrace = thread__priv(thread);
        double ratio;
 
@@ -2956,25 +2715,45 @@ static int trace__fprintf_one_thread(struct thread *thread, void *priv)
                printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
        if (ttrace->pfmin)
                printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
-       printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+       if (trace->sched)
+               printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+       else if (fputc('\n', fp) != EOF)
+               ++printed;
+
        printed += thread__dump_stats(ttrace, trace, fp);
 
-       data->printed += printed;
+       return printed;
+}
 
-       return 0;
+static unsigned long thread__nr_events(struct thread_trace *ttrace)
+{
+       return ttrace ? ttrace->nr_events : 0;
+}
+
+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+       struct thread *thread;
+)
+{
+       entry->thread = rb_entry(nd, struct thread, rb_node);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
-       struct summary_data data = {
-               .fp = fp,
-               .trace = trace
-       };
-       data.printed = trace__fprintf_threads_header(fp);
+       DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
+       size_t printed = trace__fprintf_threads_header(fp);
+       struct rb_node *nd;
+
+       if (threads == NULL) {
+               fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+               return 0;
+       }
 
-       machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
+       resort_rb__for_each(nd, threads)
+               printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
 
-       return data.printed;
+       resort_rb__delete(threads);
+
+       return printed;
 }
 
 static int trace__set_duration(const struct option *opt, const char *str,
@@ -3070,10 +2849,6 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                NULL
        };
        struct trace trace = {
-               .audit = {
-                       .machine = audit_detect_machine(),
-                       .open_id = audit_name_to_syscall("open", trace.audit.machine),
-               },
                .syscalls = {
                        . max = -1,
                },
@@ -3091,6 +2866,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                .output = stderr,
                .show_comm = true,
                .trace_syscalls = true,
+               .kernel_syscallchains = false,
+               .max_stack = UINT_MAX,
        };
        const char *output_name = NULL;
        const char *ev_qualifier_str = NULL;
@@ -3136,10 +2913,24 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Trace pagefaults", parse_pagefaults, "maj"),
        OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
        OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+       OPT_CALLBACK(0, "call-graph", &trace.opts,
+                    "record_mode[,record_size]", record_callchain_help,
+                    &record_parse_callchain_opt),
+       OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
+                   "Show the kernel callchains on the syscall exit path"),
+       OPT_UINTEGER(0, "min-stack", &trace.min_stack,
+                    "Set the minimum stack depth when parsing the callchain, "
+                    "anything below the specified depth will be ignored."),
+       OPT_UINTEGER(0, "max-stack", &trace.max_stack,
+                    "Set the maximum stack depth when parsing the callchain, "
+                    "anything beyond the specified depth will be ignored. "
+                    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
        OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
                        "per thread proc mmap processing timeout in ms"),
        OPT_END()
        };
+       bool __maybe_unused max_stack_user_set = true;
+       bool mmap_pages_user_set = true;
        const char * const trace_subcommands[] = { "record", NULL };
        int err;
        char bf[BUFSIZ];
@@ -3148,8 +2939,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        signal(SIGFPE, sighandler_dump_stack);
 
        trace.evlist = perf_evlist__new();
+       trace.sctbl = syscalltbl__new();
 
-       if (trace.evlist == NULL) {
+       if (trace.evlist == NULL || trace.sctbl == NULL) {
                pr_err("Not enough memory to run!\n");
                err = -ENOMEM;
                goto out;
@@ -3158,11 +2950,40 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
        argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
                                 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
+       err = bpf__setup_stdout(trace.evlist);
+       if (err) {
+               bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
+               pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+               goto out;
+       }
+
+       err = -1;
+
        if (trace.trace_pgfaults) {
                trace.opts.sample_address = true;
                trace.opts.sample_time = true;
        }
 
+       if (trace.opts.mmap_pages == UINT_MAX)
+               mmap_pages_user_set = false;
+
+       if (trace.max_stack == UINT_MAX) {
+               trace.max_stack = sysctl_perf_event_max_stack;
+               max_stack_user_set = false;
+       }
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+       if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled)
+               record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
+#endif
+
+       if (callchain_param.enabled) {
+               if (!mmap_pages_user_set && geteuid() == 0)
+                       trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
+
+               symbol_conf.use_callchain = true;
+       }
+
        if (trace.evlist->nr_entries > 0)
                evlist__set_evsel_handler(trace.evlist, trace__event_handler);
 
@@ -3179,6 +3000,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                return -1;
        }
 
+       if (!trace.trace_syscalls && ev_qualifier_str) {
+               pr_err("The -e option can't be used with --no-syscalls.\n");
+               goto out;
+       }
+
        if (output_name != NULL) {
                err = trace__open_output(&trace, output_name);
                if (err < 0) {
@@ -3187,6 +3013,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
                }
        }
 
+       trace.open_id = syscalltbl__id(trace.sctbl, "open");
+
        if (ev_qualifier_str != NULL) {
                const char *s = ev_qualifier_str;
                struct strlist_config slist_config = {
index f7d7f5a1cad538e44be9400b520ca32315eed999..1e46277286c2e352417bba9c4b6f358f447a2b23 100644 (file)
@@ -27,7 +27,7 @@ NO_PERF_REGS := 1
 ifeq ($(ARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
-    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
+    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
     LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
     $(call detected,CONFIG_X86_64)
@@ -268,6 +268,12 @@ else
     ifneq ($(feature-dwarf), 1)
       msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
       NO_DWARF := 1
+    else
+      ifneq ($(feature-dwarf_getlocations), 1)
+        msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157);
+      else
+        CFLAGS += -DHAVE_DWARF_GETLOCATIONS
+      endif # dwarf_getlocations
     endif # Dwarf support
   endif # libelf support
 endif # NO_LIBELF
@@ -289,9 +295,6 @@ ifndef NO_LIBELF
     CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
   endif
 
-  # include ARCH specific config
-  -include $(src-perf)/arch/$(ARCH)/Makefile
-
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
       msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
index 6461e02ab940d332bdf42dbbe066f4723ee2de02..3573f315f9559cee48cb0cf59530d334c893b3f3 100644 (file)
@@ -92,6 +92,22 @@ error:
        return ret;
 }
 
+static int use_arch_timestamp;
+
+static inline uint64_t
+get_arch_timestamp(void)
+{
+#if defined(__i386__) || defined(__x86_64__)
+       unsigned int low, high;
+
+       asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+       return low | ((uint64_t)high) << 32;
+#else
+       return 0;
+#endif
+}
+
 #define NSEC_PER_SEC   1000000000
 static int perf_clk_id = CLOCK_MONOTONIC;
 
@@ -107,6 +123,9 @@ perf_get_timestamp(void)
        struct timespec ts;
        int ret;
 
+       if (use_arch_timestamp)
+               return get_arch_timestamp();
+
        ret = clock_gettime(perf_clk_id, &ts);
        if (ret)
                return 0;
@@ -203,6 +222,17 @@ perf_close_marker_file(void)
        munmap(marker_addr, pgsz);
 }
 
+static void
+init_arch_timestamp(void)
+{
+       char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
+
+       if (!str || !*str || !strcmp(str, "0"))
+               return;
+
+       use_arch_timestamp = 1;
+}
+
 void *jvmti_open(void)
 {
        int pad_cnt;
@@ -211,11 +241,17 @@ void *jvmti_open(void)
        int fd;
        FILE *fp;
 
+       init_arch_timestamp();
+
        /*
         * check if clockid is supported
         */
-       if (!perf_get_timestamp())
-               warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+       if (!perf_get_timestamp()) {
+               if (use_arch_timestamp)
+                       warnx("jvmti: arch timestamp not supported");
+               else
+                       warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+       }
 
        memset(&header, 0, sizeof(header));
 
@@ -263,6 +299,9 @@ void *jvmti_open(void)
 
        header.timestamp = perf_get_timestamp();
 
+       if (use_arch_timestamp)
+               header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
        if (!fwrite(&header, sizeof(header), 1, fp)) {
                warn("jvmti: cannot write dumpfile header");
                goto error;
index aaee0a7827477810c5c0d2d82753545592d7f22f..83ffe7cd73301f8990b23c200e135f62aa57cbac 100644 (file)
@@ -17,6 +17,7 @@
 #include <subcmd/parse-options.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
+#include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <pthread.h>
 #include <stdlib.h>
@@ -533,6 +534,7 @@ int main(int argc, const char **argv)
 {
        const char *cmd;
        char sbuf[STRERR_BUFSIZE];
+       int value;
 
        /* libsubcmd init */
        exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -542,6 +544,9 @@ int main(int argc, const char **argv)
        page_size = sysconf(_SC_PAGE_SIZE);
        cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
 
+       if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+               sysctl_perf_event_max_stack = value;
+
        cmd = extract_argv0_path(argv[0]);
        if (!cmd)
                cmd = "perf-help";
@@ -549,6 +554,7 @@ int main(int argc, const char **argv)
        srandom(time(NULL));
 
        perf_config(perf_default_config, NULL);
+       set_buildid_dir(NULL);
 
        /* get debugfs/tracefs mount point from /proc/mounts */
        tracing_path_mount();
@@ -572,7 +578,6 @@ int main(int argc, const char **argv)
        }
        if (!prefixcmp(cmd, "trace")) {
 #ifdef HAVE_LIBAUDIT_SUPPORT
-               set_buildid_dir(NULL);
                setup_path();
                argv[0] = "trace";
                return cmd_trace(argc, argv, NULL);
@@ -587,7 +592,6 @@ int main(int argc, const char **argv)
        argc--;
        handle_options(&argv, &argc, NULL);
        commit_pager_choice();
-       set_buildid_dir(NULL);
 
        if (argc > 0) {
                if (!prefixcmp(argv[0], "--"))
index 5381a01c0610c0e61f079140ed5cdc2df3f87b0d..cd8f1b150f9ec67ec2e3aef5dd45f6eee7918e89 100644 (file)
@@ -52,7 +52,6 @@ struct record_opts {
        bool         sample_weight;
        bool         sample_time;
        bool         sample_time_set;
-       bool         callgraph_set;
        bool         period;
        bool         running_time;
        bool         full_auxtrace;
index 1b02cdc0cab69b53f0aa97ed544d6383fba56d3d..7656ff8aa066ad7f202bb2637c9f4707422894f7 100644 (file)
@@ -34,10 +34,9 @@ import datetime
 #
 # ubuntu:
 #
-#      $ sudo apt-get install postgresql
+#      $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
 #      $ sudo su - postgres
-#      $ createuser <your user id here>
-#      Shall the new role be a superuser? (y/n) y
+#      $ createuser -s <your user id here>
 #
 # An example of using this script with Intel PT:
 #
@@ -224,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
 
 perf_db_export_mode = True
 perf_db_export_calls = False
+perf_db_export_callchains = False
+
 
 def usage():
-       print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
+       print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
        print >> sys.stderr, "where:    columns         'all' or 'branches'"
-       print >> sys.stderr, "          calls           'calls' => create calls table"
+       print >> sys.stderr, "          calls           'calls' => create calls and call_paths table"
+       print >> sys.stderr, "          callchains      'callchains' => create call_paths table"
        raise Exception("Too few arguments")
 
 if (len(sys.argv) < 2):
@@ -246,9 +248,11 @@ if columns not in ("all", "branches"):
 
 branches = (columns == "branches")
 
-if (len(sys.argv) >= 4):
-       if (sys.argv[3] == "calls"):
+for i in range(3,len(sys.argv)):
+       if (sys.argv[i] == "calls"):
                perf_db_export_calls = True
+       elif (sys.argv[i] == "callchains"):
+               perf_db_export_callchains = True
        else:
                usage()
 
@@ -359,14 +363,16 @@ else:
                'transaction    bigint,'
                'data_src       bigint,'
                'branch_type    integer,'
-               'in_tx          boolean)')
+               'in_tx          boolean,'
+               'call_path_id   bigint)')
 
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
        do_query(query, 'CREATE TABLE call_paths ('
                'id             bigint          NOT NULL,'
                'parent_id      bigint,'
                'symbol_id      bigint,'
                'ip             bigint)')
+if perf_db_export_calls:
        do_query(query, 'CREATE TABLE calls ('
                'id             bigint          NOT NULL,'
                'thread_id      bigint,'
@@ -428,7 +434,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS '
                '(SELECT tid FROM threads WHERE id = thread_id) AS tid'
        ' FROM comm_threads')
 
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
        do_query(query, 'CREATE VIEW call_paths_view AS '
                'SELECT '
                        'c.id,'
@@ -444,6 +450,7 @@ if perf_db_export_calls:
                        '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
                        '(SELECT dso FROM symbols_view  WHERE id = p.symbol_id) AS parent_dso_short_name'
                ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
        do_query(query, 'CREATE VIEW calls_view AS '
                'SELECT '
                        'calls.id,'
@@ -541,8 +548,9 @@ dso_file            = open_output_file("dso_table.bin")
 symbol_file            = open_output_file("symbol_table.bin")
 branch_type_file       = open_output_file("branch_type_table.bin")
 sample_file            = open_output_file("sample_table.bin")
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
        call_path_file          = open_output_file("call_path_table.bin")
+if perf_db_export_calls:
        call_file               = open_output_file("call_table.bin")
 
 def trace_begin():
@@ -554,8 +562,8 @@ def trace_begin():
        comm_table(0, "unknown")
        dso_table(0, 0, "unknown", "unknown", "")
        symbol_table(0, 0, 0, 0, 0, "unknown")
-       sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-       if perf_db_export_calls:
+       sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+       if perf_db_export_calls or perf_db_export_callchains:
                call_path_table(0, 0, 0, 0)
 
 unhandled_count = 0
@@ -571,8 +579,9 @@ def trace_end():
        copy_output_file(symbol_file,           "symbols")
        copy_output_file(branch_type_file,      "branch_types")
        copy_output_file(sample_file,           "samples")
-       if perf_db_export_calls:
+       if perf_db_export_calls or perf_db_export_callchains:
                copy_output_file(call_path_file,        "call_paths")
+       if perf_db_export_calls:
                copy_output_file(call_file,             "calls")
 
        print datetime.datetime.today(), "Removing intermediate files..."
@@ -585,8 +594,9 @@ def trace_end():
        remove_output_file(symbol_file)
        remove_output_file(branch_type_file)
        remove_output_file(sample_file)
-       if perf_db_export_calls:
+       if perf_db_export_calls or perf_db_export_callchains:
                remove_output_file(call_path_file)
+       if perf_db_export_calls:
                remove_output_file(call_file)
        os.rmdir(output_dir_name)
        print datetime.datetime.today(), "Adding primary keys"
@@ -599,8 +609,9 @@ def trace_end():
        do_query(query, 'ALTER TABLE symbols         ADD PRIMARY KEY (id)')
        do_query(query, 'ALTER TABLE branch_types    ADD PRIMARY KEY (id)')
        do_query(query, 'ALTER TABLE samples         ADD PRIMARY KEY (id)')
-       if perf_db_export_calls:
+       if perf_db_export_calls or perf_db_export_callchains:
                do_query(query, 'ALTER TABLE call_paths      ADD PRIMARY KEY (id)')
+       if perf_db_export_calls:
                do_query(query, 'ALTER TABLE calls           ADD PRIMARY KEY (id)')
 
        print datetime.datetime.today(), "Adding foreign keys"
@@ -623,10 +634,11 @@ def trace_end():
                                        'ADD CONSTRAINT symbolfk   FOREIGN KEY (symbol_id)    REFERENCES symbols    (id),'
                                        'ADD CONSTRAINT todsofk    FOREIGN KEY (to_dso_id)    REFERENCES dsos       (id),'
                                        'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols    (id)')
-       if perf_db_export_calls:
+       if perf_db_export_calls or perf_db_export_callchains:
                do_query(query, 'ALTER TABLE call_paths '
                                        'ADD CONSTRAINT parentfk    FOREIGN KEY (parent_id)    REFERENCES call_paths (id),'
                                        'ADD CONSTRAINT symbolfk    FOREIGN KEY (symbol_id)    REFERENCES symbols    (id)')
+       if perf_db_export_calls:
                do_query(query, 'ALTER TABLE calls '
                                        'ADD CONSTRAINT threadfk    FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
                                        'ADD CONSTRAINT commfk      FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
@@ -694,11 +706,11 @@ def branch_type_table(branch_type, name, *x):
        value = struct.pack(fmt, 2, 4, branch_type, n, name)
        branch_type_file.write(value)
 
-def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
        if branches:
-               value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
+               value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
        else:
-               value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
+               value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
        sample_file.write(value)
 
 def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
index 1ba628ed049adbafc27c7b8900ecb838165a2aa7..449fe97a555f7e4f32ebb79452001da9b51b06d3 100644 (file)
@@ -37,6 +37,7 @@ perf-y += topology.o
 perf-y += cpumap.o
 perf-y += stat.o
 perf-y += event_update.o
+perf-y += event-times.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
        $(call rule_mkdir)
index 199501c71e272491850065910aae5003603ab10e..f31eed31c1a9cff8e8307a1f06c3013329871ec6 100644 (file)
@@ -138,7 +138,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
        perf_evlist__splice_list_tail(evlist, &parse_evlist.list);
        evlist->nr_groups = parse_evlist.nr_groups;
 
-       perf_evlist__config(evlist, &opts);
+       perf_evlist__config(evlist, &opts, NULL);
 
        err = perf_evlist__open(evlist);
        if (err < 0) {
index f2b1dcac45d3065d90ef6fb01bdd57694577d809..93c467015e711e1ac563039c84a7916060d1440b 100644 (file)
@@ -203,6 +203,10 @@ static struct test generic_tests[] = {
                .desc = "Test attr update synthesize",
                .func = test__event_update,
        },
+       {
+               .desc = "Test events times",
+               .func = test__event_times,
+       },
        {
                .func = NULL,
        },
index abd3f0ec0c0b8e61c6371df1077e1fb2240e068e..68a69a195545e16bfd0913f1da56c88131734f5f 100644 (file)
@@ -532,7 +532,7 @@ static int do_test_code_reading(bool try_kcore)
                        goto out_put;
                }
 
-               perf_evlist__config(evlist, &opts);
+               perf_evlist__config(evlist, &opts, NULL);
 
                evsel = perf_evlist__first(evlist);
 
index dc673ff7c43756503ca095184f44bd2daa78b50e..8cf0d9e189a8bea36c02c965d74e216d29409089 100644 (file)
@@ -202,7 +202,7 @@ static int dsos__create(int cnt, int size)
 {
        int i;
 
-       dsos = malloc(sizeof(dsos) * cnt);
+       dsos = malloc(sizeof(*dsos) * cnt);
        TEST_ASSERT_VAL("failed to alloc dsos array", dsos);
 
        for (i = 0; i < cnt; i++) {
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
new file mode 100644 (file)
index 0000000..95fb744
--- /dev/null
@@ -0,0 +1,236 @@
+#include <linux/compiler.h>
+#include <string.h>
+#include "tests.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+#include "debug.h"
+#include "thread_map.h"
+#include "target.h"
+
+static int attach__enable_on_exec(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__last(evlist);
+       struct target target = {
+               .uid = UINT_MAX,
+       };
+       const char *argv[] = { "true", NULL, };
+       char sbuf[STRERR_BUFSIZE];
+       int err;
+
+       pr_debug("attaching to spawned child, enable on exec\n");
+
+       err = perf_evlist__create_maps(evlist, &target);
+       if (err < 0) {
+               pr_debug("Not enough memory to create thread/cpu maps\n");
+               return err;
+       }
+
+       err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL);
+       if (err < 0) {
+               pr_debug("Couldn't run the workload!\n");
+               return err;
+       }
+
+       evsel->attr.enable_on_exec = 1;
+
+       err = perf_evlist__open(evlist);
+       if (err < 0) {
+               pr_debug("perf_evlist__open: %s\n",
+                        strerror_r(errno, sbuf, sizeof(sbuf)));
+               return err;
+       }
+
+       return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__enable_on_exec(struct perf_evlist *evlist)
+{
+       waitpid(evlist->workload.pid, NULL, 0);
+       return 0;
+}
+
+static int attach__current_disabled(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__last(evlist);
+       struct thread_map *threads;
+       int err;
+
+       pr_debug("attaching to current thread as disabled\n");
+
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
+       if (threads == NULL) {
+               pr_debug("thread_map__new\n");
+               return -1;
+       }
+
+       evsel->attr.disabled = 1;
+
+       err = perf_evsel__open_per_thread(evsel, threads);
+       if (err) {
+               pr_debug("Failed to open event cpu-clock:u\n");
+               return err;
+       }
+
+       thread_map__put(threads);
+       return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int attach__current_enabled(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__last(evlist);
+       struct thread_map *threads;
+       int err;
+
+       pr_debug("attaching to current thread as enabled\n");
+
+       threads = thread_map__new(-1, getpid(), UINT_MAX);
+       if (threads == NULL) {
+               pr_debug("failed to call thread_map__new\n");
+               return -1;
+       }
+
+       err = perf_evsel__open_per_thread(evsel, threads);
+
+       thread_map__put(threads);
+       return err == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__disable(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__last(evlist);
+
+       return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_disabled(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__last(evlist);
+       struct cpu_map *cpus;
+       int err;
+
+       pr_debug("attaching to CPU 0 as enabled\n");
+
+       cpus = cpu_map__new("0");
+       if (cpus == NULL) {
+               pr_debug("failed to call cpu_map__new\n");
+               return -1;
+       }
+
+       evsel->attr.disabled = 1;
+
+       err = perf_evsel__open_per_cpu(evsel, cpus);
+       if (err) {
+               if (err == -EACCES)
+                       return TEST_SKIP;
+
+               pr_debug("Failed to open event cpu-clock:u\n");
+               return err;
+       }
+
+       cpu_map__put(cpus);
+       return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_enabled(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__last(evlist);
+       struct cpu_map *cpus;
+       int err;
+
+       pr_debug("attaching to CPU 0 as enabled\n");
+
+       cpus = cpu_map__new("0");
+       if (cpus == NULL) {
+               pr_debug("failed to call cpu_map__new\n");
+               return -1;
+       }
+
+       err = perf_evsel__open_per_cpu(evsel, cpus);
+       if (err == -EACCES)
+               return TEST_SKIP;
+
+       cpu_map__put(cpus);
+       return err ? TEST_FAIL : TEST_OK;
+}
+
+static int test_times(int (attach)(struct perf_evlist *),
+                     int (detach)(struct perf_evlist *))
+{
+       struct perf_counts_values count;
+       struct perf_evlist *evlist = NULL;
+       struct perf_evsel *evsel;
+       int err = -1, i;
+
+       evlist = perf_evlist__new();
+       if (!evlist) {
+               pr_debug("failed to create event list\n");
+               goto out_err;
+       }
+
+       err = parse_events(evlist, "cpu-clock:u", NULL);
+       if (err) {
+               pr_debug("failed to parse event cpu-clock:u\n");
+               goto out_err;
+       }
+
+       evsel = perf_evlist__last(evlist);
+       evsel->attr.read_format |=
+               PERF_FORMAT_TOTAL_TIME_ENABLED |
+               PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+       err = attach(evlist);
+       if (err == TEST_SKIP) {
+               pr_debug("  SKIP  : not enough rights\n");
+               return err;
+       }
+
+       TEST_ASSERT_VAL("failed to attach", !err);
+
+       for (i = 0; i < 100000000; i++) { }
+
+       TEST_ASSERT_VAL("failed to detach", !detach(evlist));
+
+       perf_evsel__read(evsel, 0, 0, &count);
+
+       err = !(count.ena == count.run);
+
+       pr_debug("  %s: ena %" PRIu64", run %" PRIu64"\n",
+                !err ? "OK    " : "FAILED",
+                count.ena, count.run);
+
+out_err:
+       if (evlist)
+               perf_evlist__delete(evlist);
+       return !err ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This test creates software event 'cpu-clock'
+ * attaches it in several ways (explained below)
+ * and checks that enabled and running times
+ * match.
+ */
+int test__event_times(int subtest __maybe_unused)
+{
+       int err, ret = 0;
+
+#define _T(attach, detach)                     \
+       err = test_times(attach, detach);       \
+       if (err && (ret == TEST_OK || ret == TEST_SKIP))        \
+               ret = err;
+
+       /* attach on newly spawned process after exec */
+       _T(attach__enable_on_exec,   detach__enable_on_exec)
+       /* attach on current process as enabled */
+       _T(attach__current_enabled,  detach__disable)
+       /* attach on current process as disabled */
+       _T(attach__current_disabled, detach__disable)
+       /* attach on cpu as disabled */
+       _T(attach__cpu_disabled,     detach__disable)
+       /* attach on cpu as enabled */
+       _T(attach__cpu_enabled,      detach__disable)
+
+#undef _T
+       return ret;
+}
index 012eab5d1df115e7cbeb7d8a505a46d0e43f854a..63ecf21750eb0dca6e865cb90a57905df1fb566e 100644 (file)
@@ -30,7 +30,7 @@ static int process_event_scale(struct perf_tool *tool __maybe_unused,
 
        TEST_ASSERT_VAL("wrong id", ev->id == 123);
        TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
-       TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123);
+       TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123);
        return 0;
 }
 
index f55f4bd47932dbaaf325a6732d0daa7a12a2358b..6b21746d6eec84b0bf443fc4cd05523be57f9878 100644 (file)
@@ -161,7 +161,7 @@ void print_hists_in(struct hists *hists)
        struct rb_root *root;
        struct rb_node *node;
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root = &hists->entries_collapsed;
        else
                root = hists->entries_in;
index ed5aa9eaeb6cf51d113e75620737641737a85fb6..a9e3db3afac423a89f589ee995433278172b019e 100644 (file)
@@ -101,7 +101,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                if (machine__resolve(machine, &al, &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+               if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
                                         NULL) < 0) {
                        addr_location__put(&al);
                        goto out;
@@ -126,7 +126,7 @@ static void del_hist_entries(struct hists *hists)
        struct rb_root *root_out;
        struct rb_node *node;
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root_in = &hists->entries_collapsed;
        else
                root_in = hists->entries_in;
index b825d24f81866b8756903ce8975beb74e932e326..e846f8c420136426fae224ce658946923b126f83 100644 (file)
@@ -81,7 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist,
 
                        al.socket = fake_samples[i].socket;
                        if (hist_entry_iter__add(&iter, &al,
-                                                PERF_MAX_STACK_DEPTH, NULL) < 0) {
+                                                sysctl_perf_event_max_stack, NULL) < 0) {
                                addr_location__put(&al);
                                goto out;
                        }
index 358324e47805e7f9a5210a70a6bfb99371dacd04..acf5a1301c0771eafaee8775e10ad470fd0d6b07 100644 (file)
@@ -145,7 +145,7 @@ static int __validate_match(struct hists *hists)
        /*
         * Only entries from fake_common_samples should have a pair.
         */
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root = &hists->entries_collapsed;
        else
                root = hists->entries_in;
@@ -197,7 +197,7 @@ static int __validate_link(struct hists *hists, int idx)
         * and some entries will have no pair.  However every entry
         * in other hists should have (dummy) pair.
         */
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root = &hists->entries_collapsed;
        else
                root = hists->entries_in;
index d3556fbe8c5caeaa0dee938615417c64346e8551..63c5efaba1b5c611d3bb96da433d93415b3b87ca 100644 (file)
@@ -67,7 +67,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
                if (machine__resolve(machine, &al, &sample) < 0)
                        goto out;
 
-               if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+               if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
                                         NULL) < 0) {
                        addr_location__put(&al);
                        goto out;
@@ -92,7 +92,7 @@ static void del_hist_entries(struct hists *hists)
        struct rb_root *root_out;
        struct rb_node *node;
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root_in = &hists->entries_collapsed;
        else
                root_in = hists->entries_in;
index ddb78fae064a50ca1d43476b29c7b5ff7ee2e7b3..614e45a3c6038099fd9fe6b838db0f1988d20855 100644 (file)
@@ -80,7 +80,7 @@ int test__keep_tracking(int subtest __maybe_unused)
        CHECK__(parse_events(evlist, "dummy:u", NULL));
        CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-       perf_evlist__config(evlist, &opts);
+       perf_evlist__config(evlist, &opts, NULL);
 
        evsel = perf_evlist__first(evlist);
 
index eb99a105f31ce60b6d16123c92524e2f436eb71b..4344fe482c1d2f9a9dec676e236d3200e372e78f 100644 (file)
@@ -44,7 +44,7 @@ int test__syscall_openat_tp_fields(int subtest __maybe_unused)
                goto out_delete_evlist;
        }
 
-       perf_evsel__config(evsel, &opts);
+       perf_evsel__config(evsel, &opts, NULL);
 
        thread_map__set_pid(evlist->threads, 0, getpid());
 
index 1cc78cefe3990906d8195c43c4d6971e630ddc42..b836ee6a8d9bb6a676f65bcfa5243fef840c73e0 100644 (file)
@@ -99,7 +99,7 @@ int test__PERF_RECORD(int subtest __maybe_unused)
        perf_evsel__set_sample_bit(evsel, CPU);
        perf_evsel__set_sample_bit(evsel, TID);
        perf_evsel__set_sample_bit(evsel, TIME);
-       perf_evlist__config(evlist, &opts);
+       perf_evlist__config(evlist, &opts, NULL);
 
        err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
        if (err < 0) {
index ebd80168d51e853a29e0f34d93e4dad1812b417b..39a689bf7574e13010a59bf8dae7b748aa0b63c8 100644 (file)
@@ -417,7 +417,7 @@ int test__switch_tracking(int subtest __maybe_unused)
        perf_evsel__set_sample_bit(tracking_evsel, TIME);
 
        /* Config events */
-       perf_evlist__config(evlist, &opts);
+       perf_evlist__config(evlist, &opts, NULL);
 
        /* Check moved event is still at the front */
        if (cycles_evsel != perf_evlist__first(evlist)) {
index 82b2b5e6ba7c7613ca58f9a46cda688b36444915..0fc946989cf0f5fa747f51dee0eb28e438ce5b37 100644 (file)
@@ -85,6 +85,7 @@ int test__synthesize_stat_config(int subtest);
 int test__synthesize_stat(int subtest);
 int test__synthesize_stat_round(int subtest);
 int test__event_update(int subtest);
+int test__event_times(int subtest);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
index 630b0b409b973f87ba00312e5e4e3d8fdf829f32..e63abab7d5a17c1f283b3a90a68e19918edf420d 100644 (file)
@@ -54,8 +54,14 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
         * Step 3:
         *
         * Load and split /proc/kallsyms into multiple maps, one per module.
+        * Do not use kcore, as this test was designed before kcore support
+        * and has parts that only make sense if using the non-kcore code.
+        * XXX: extend it to stress the kcorre code as well, hint: the list
+        * of modules extracted from /proc/kcore, in its current form, can't
+        * be compacted against the list of modules found in the "vmlinux"
+        * code and with the one got from /proc/modules from the "kallsyms" code.
         */
-       if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) {
+       if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
                pr_debug("dso__load_kallsyms ");
                goto out;
        }
@@ -157,6 +163,9 @@ next_pair:
 
                                        pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
                                                 mem_start, sym->name, pair->name);
+                               } else {
+                                       pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+                                                mem_start, sym->name, first_pair->name);
                                }
                        }
                } else
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
new file mode 100644 (file)
index 0000000..d64f4a9
--- /dev/null
@@ -0,0 +1,38 @@
+#include <sys/eventfd.h>
+
+#ifndef EFD_SEMAPHORE
+#define EFD_SEMAPHORE          1
+#endif
+
+#ifndef EFD_NONBLOCK
+#define EFD_NONBLOCK           00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+#define EFD_CLOEXEC            02000000
+#endif
+
+static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+       if (flags == 0)
+               return scnprintf(bf, size, "NONE");
+#define        P_FLAG(n) \
+       if (flags & EFD_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~EFD_##n; \
+       }
+
+       P_FLAG(SEMAPHORE);
+       P_FLAG(CLOEXEC);
+       P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
new file mode 100644 (file)
index 0000000..e247621
--- /dev/null
@@ -0,0 +1,44 @@
+#include <linux/futex.h>
+
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+       enum syscall_futex_args {
+               SCF_UADDR   = (1 << 0),
+               SCF_OP      = (1 << 1),
+               SCF_VAL     = (1 << 2),
+               SCF_TIMEOUT = (1 << 3),
+               SCF_UADDR2  = (1 << 4),
+               SCF_VAL3    = (1 << 5),
+       };
+       int op = arg->val;
+       int cmd = op & FUTEX_CMD_MASK;
+       size_t printed = 0;
+
+       switch (cmd) {
+#define        P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
+       P_FUTEX_OP(WAIT);           arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
+       P_FUTEX_OP(WAKE);           arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+       P_FUTEX_OP(FD);             arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+       P_FUTEX_OP(REQUEUE);        arg->mask |= SCF_VAL3|SCF_TIMEOUT;            break;
+       P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;                     break;
+       P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;                     break;
+       P_FUTEX_OP(WAKE_OP);                                                      break;
+       P_FUTEX_OP(LOCK_PI);        arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+       P_FUTEX_OP(UNLOCK_PI);      arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+       P_FUTEX_OP(TRYLOCK_PI);     arg->mask |= SCF_VAL3|SCF_UADDR2;             break;
+       P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;                      break;
+       P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;                      break;
+       P_FUTEX_OP(WAIT_REQUEUE_PI);                                              break;
+       default: printed = scnprintf(bf, size, "%#x", cmd);                       break;
+       }
+
+       if (op & FUTEX_PRIVATE_FLAG)
+               printed += scnprintf(bf + printed, size - printed, "|PRIV");
+
+       if (op & FUTEX_CLOCK_REALTIME)
+               printed += scnprintf(bf + printed, size - printed, "|CLKRT");
+
+       return printed;
+}
+
+#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
new file mode 100644 (file)
index 0000000..3444a4d
--- /dev/null
@@ -0,0 +1,158 @@
+#include <sys/mman.h>
+
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
+                                              struct syscall_arg *arg)
+{
+       int printed = 0, prot = arg->val;
+
+       if (prot == PROT_NONE)
+               return scnprintf(bf, size, "NONE");
+#define        P_MMAP_PROT(n) \
+       if (prot & PROT_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               prot &= ~PROT_##n; \
+       }
+
+       P_MMAP_PROT(EXEC);
+       P_MMAP_PROT(READ);
+       P_MMAP_PROT(WRITE);
+#ifdef PROT_SEM
+       P_MMAP_PROT(SEM);
+#endif
+       P_MMAP_PROT(GROWSDOWN);
+       P_MMAP_PROT(GROWSUP);
+#undef P_MMAP_PROT
+
+       if (prot)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
+
+       return printed;
+}
+
+#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
+
+#ifndef MAP_STACK
+# define MAP_STACK             0x20000
+#endif
+
+static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
+                                               struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+#define        P_MMAP_FLAG(n) \
+       if (flags & MAP_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~MAP_##n; \
+       }
+
+       P_MMAP_FLAG(SHARED);
+       P_MMAP_FLAG(PRIVATE);
+#ifdef MAP_32BIT
+       P_MMAP_FLAG(32BIT);
+#endif
+       P_MMAP_FLAG(ANONYMOUS);
+       P_MMAP_FLAG(DENYWRITE);
+       P_MMAP_FLAG(EXECUTABLE);
+       P_MMAP_FLAG(FILE);
+       P_MMAP_FLAG(FIXED);
+       P_MMAP_FLAG(GROWSDOWN);
+#ifdef MAP_HUGETLB
+       P_MMAP_FLAG(HUGETLB);
+#endif
+       P_MMAP_FLAG(LOCKED);
+       P_MMAP_FLAG(NONBLOCK);
+       P_MMAP_FLAG(NORESERVE);
+       P_MMAP_FLAG(POPULATE);
+       P_MMAP_FLAG(STACK);
+#ifdef MAP_UNINITIALIZED
+       P_MMAP_FLAG(UNINITIALIZED);
+#endif
+#undef P_MMAP_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
+
+static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
+                                                 struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+#define P_MREMAP_FLAG(n) \
+       if (flags & MREMAP_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~MREMAP_##n; \
+       }
+
+       P_MREMAP_FLAG(MAYMOVE);
+#ifdef MREMAP_FIXED
+       P_MREMAP_FLAG(FIXED);
+#endif
+#undef P_MREMAP_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
+
+#ifndef MADV_HWPOISON
+#define MADV_HWPOISON          100
+#endif
+
+#ifndef MADV_MERGEABLE
+#define MADV_MERGEABLE          12
+#endif
+
+#ifndef MADV_UNMERGEABLE
+#define MADV_UNMERGEABLE        13
+#endif
+
+static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
+                                                     struct syscall_arg *arg)
+{
+       int behavior = arg->val;
+
+       switch (behavior) {
+#define        P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
+       P_MADV_BHV(NORMAL);
+       P_MADV_BHV(RANDOM);
+       P_MADV_BHV(SEQUENTIAL);
+       P_MADV_BHV(WILLNEED);
+       P_MADV_BHV(DONTNEED);
+       P_MADV_BHV(REMOVE);
+       P_MADV_BHV(DONTFORK);
+       P_MADV_BHV(DOFORK);
+       P_MADV_BHV(HWPOISON);
+#ifdef MADV_SOFT_OFFLINE
+       P_MADV_BHV(SOFT_OFFLINE);
+#endif
+       P_MADV_BHV(MERGEABLE);
+       P_MADV_BHV(UNMERGEABLE);
+#ifdef MADV_HUGEPAGE
+       P_MADV_BHV(HUGEPAGE);
+#endif
+#ifdef MADV_NOHUGEPAGE
+       P_MADV_BHV(NOHUGEPAGE);
+#endif
+#ifdef MADV_DONTDUMP
+       P_MADV_BHV(DONTDUMP);
+#endif
+#ifdef MADV_DODUMP
+       P_MADV_BHV(DODUMP);
+#endif
+#undef P_MADV_PHV
+       default: break;
+       }
+
+       return scnprintf(bf, size, "%#x", behavior);
+}
+
+#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
new file mode 100644 (file)
index 0000000..930d8fe
--- /dev/null
@@ -0,0 +1,68 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* From include/linux/stat.h */
+#ifndef S_IRWXUGO
+#define S_IRWXUGO      (S_IRWXU|S_IRWXG|S_IRWXO)
+#endif
+#ifndef S_IALLUGO
+#define S_IALLUGO      (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#endif
+#ifndef S_IRUGO
+#define S_IRUGO         (S_IRUSR|S_IRGRP|S_IROTH)
+#endif
+#ifndef S_IWUGO
+#define S_IWUGO         (S_IWUSR|S_IWGRP|S_IWOTH)
+#endif
+#ifndef S_IXUGO
+#define S_IXUGO         (S_IXUSR|S_IXGRP|S_IXOTH)
+#endif
+
+static size_t syscall_arg__scnprintf_mode_t(char *bf, size_t size, struct syscall_arg *arg)
+{
+       int printed = 0, mode = arg->val;
+
+#define        P_MODE(n) \
+       if ((mode & S_##n) == S_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               mode &= ~S_##n; \
+       }
+
+       P_MODE(IALLUGO);
+       P_MODE(IRWXUGO);
+       P_MODE(IRUGO);
+       P_MODE(IWUGO);
+       P_MODE(IXUGO);
+       P_MODE(IFMT);
+       P_MODE(IFSOCK);
+       P_MODE(IFLNK);
+       P_MODE(IFREG);
+       P_MODE(IFBLK);
+       P_MODE(IFDIR);
+       P_MODE(IFCHR);
+       P_MODE(IFIFO);
+       P_MODE(ISUID);
+       P_MODE(ISGID);
+       P_MODE(ISVTX);
+       P_MODE(IRWXU);
+       P_MODE(IRUSR);
+       P_MODE(IWUSR);
+       P_MODE(IXUSR);
+       P_MODE(IRWXG);
+       P_MODE(IRGRP);
+       P_MODE(IWGRP);
+       P_MODE(IXGRP);
+       P_MODE(IRWXO);
+       P_MODE(IROTH);
+       P_MODE(IWOTH);
+       P_MODE(IXOTH);
+#undef P_MODE
+
+       if (mode)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", mode);
+
+       return printed;
+}
+
+#define SCA_MODE_T syscall_arg__scnprintf_mode_t
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
new file mode 100644 (file)
index 0000000..07fa8a0
--- /dev/null
@@ -0,0 +1,62 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef MSG_PROBE
+#define MSG_PROBE                   0x10
+#endif
+#ifndef MSG_WAITFORONE
+#define MSG_WAITFORONE            0x10000
+#endif
+#ifndef MSG_SENDPAGE_NOTLAST
+#define MSG_SENDPAGE_NOTLAST      0x20000
+#endif
+#ifndef MSG_FASTOPEN
+#define MSG_FASTOPEN           0x20000000
+#endif
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC      0x40000000
+#endif
+
+static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
+                                              struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+       if (flags == 0)
+               return scnprintf(bf, size, "NONE");
+#define        P_MSG_FLAG(n) \
+       if (flags & MSG_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~MSG_##n; \
+       }
+
+       P_MSG_FLAG(OOB);
+       P_MSG_FLAG(PEEK);
+       P_MSG_FLAG(DONTROUTE);
+       P_MSG_FLAG(TRYHARD);
+       P_MSG_FLAG(CTRUNC);
+       P_MSG_FLAG(PROBE);
+       P_MSG_FLAG(TRUNC);
+       P_MSG_FLAG(DONTWAIT);
+       P_MSG_FLAG(EOR);
+       P_MSG_FLAG(WAITALL);
+       P_MSG_FLAG(FIN);
+       P_MSG_FLAG(SYN);
+       P_MSG_FLAG(CONFIRM);
+       P_MSG_FLAG(RST);
+       P_MSG_FLAG(ERRQUEUE);
+       P_MSG_FLAG(NOSIGNAL);
+       P_MSG_FLAG(MORE);
+       P_MSG_FLAG(WAITFORONE);
+       P_MSG_FLAG(SENDPAGE_NOTLAST);
+       P_MSG_FLAG(FASTOPEN);
+       P_MSG_FLAG(CMSG_CLOEXEC);
+#undef P_MSG_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
new file mode 100644 (file)
index 0000000..0f3679e
--- /dev/null
@@ -0,0 +1,56 @@
+
+static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
+                                              struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+       if (!(flags & O_CREAT))
+               arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
+
+       if (flags == 0)
+               return scnprintf(bf, size, "RDONLY");
+#define        P_FLAG(n) \
+       if (flags & O_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~O_##n; \
+       }
+
+       P_FLAG(APPEND);
+       P_FLAG(ASYNC);
+       P_FLAG(CLOEXEC);
+       P_FLAG(CREAT);
+       P_FLAG(DIRECT);
+       P_FLAG(DIRECTORY);
+       P_FLAG(EXCL);
+       P_FLAG(LARGEFILE);
+       P_FLAG(NOATIME);
+       P_FLAG(NOCTTY);
+#ifdef O_NONBLOCK
+       P_FLAG(NONBLOCK);
+#elif O_NDELAY
+       P_FLAG(NDELAY);
+#endif
+#ifdef O_PATH
+       P_FLAG(PATH);
+#endif
+       P_FLAG(RDWR);
+#ifdef O_DSYNC
+       if ((flags & O_SYNC) == O_SYNC)
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
+       else {
+               P_FLAG(DSYNC);
+       }
+#else
+       P_FLAG(SYNC);
+#endif
+       P_FLAG(TRUNC);
+       P_FLAG(WRONLY);
+#undef P_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
new file mode 100644 (file)
index 0000000..311f09d
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP         (1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT           (1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP          (1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC          (1UL << 3) /* O_CLOEXEC */
+#endif
+
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+                                               struct syscall_arg *arg)
+{
+       int printed = 0, flags = arg->val;
+
+       if (flags == 0)
+               return 0;
+
+#define        P_FLAG(n) \
+       if (flags & PERF_FLAG_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               flags &= ~PERF_FLAG_##n; \
+       }
+
+       P_FLAG(FD_NO_GROUP);
+       P_FLAG(FD_OUTPUT);
+       P_FLAG(PID_CGROUP);
+       P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+       return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
new file mode 100644 (file)
index 0000000..07486ea
--- /dev/null
@@ -0,0 +1,21 @@
+static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
+{
+       int pid = arg->val;
+       struct trace *trace = arg->trace;
+       size_t printed = scnprintf(bf, size, "%d", pid);
+       struct thread *thread = machine__findnew_thread(trace->host, pid, pid);
+
+       if (thread != NULL) {
+               if (!thread->comm_set)
+                       thread__set_comm_from_proc(thread);
+
+               if (thread->comm_set)
+                       printed += scnprintf(bf + printed, size - printed,
+                                            " (%s)", thread__comm_str(thread));
+               thread__put(thread);
+       }
+
+       return printed;
+}
+
+#define SCA_PID syscall_arg__scnprintf_pid
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
new file mode 100644 (file)
index 0000000..c205bc6
--- /dev/null
@@ -0,0 +1,44 @@
+#include <sched.h>
+
+/*
+ * Not defined anywhere else, probably, just to make sure we
+ * catch future flags
+ */
+#define SCHED_POLICY_MASK 0xff
+
+#ifndef SCHED_DEADLINE
+#define SCHED_DEADLINE 6
+#endif
+
+static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size,
+                                                 struct syscall_arg *arg)
+{
+       const char *policies[] = {
+               "NORMAL", "FIFO", "RR", "BATCH", "ISO", "IDLE", "DEADLINE",
+       };
+       size_t printed;
+       int policy = arg->val,
+           flags = policy & ~SCHED_POLICY_MASK;
+
+       policy &= SCHED_POLICY_MASK;
+       if (policy <= SCHED_DEADLINE)
+               printed = scnprintf(bf, size, "%s", policies[policy]);
+       else
+               printed = scnprintf(bf, size, "%#x", policy);
+
+#define        P_POLICY_FLAG(n) \
+       if (flags & SCHED_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+               flags &= ~SCHED_##n; \
+       }
+
+       P_POLICY_FLAG(RESET_ON_FORK);
+#undef P_POLICY_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+       return printed;
+}
+
+#define SCA_SCHED_POLICY syscall_arg__scnprintf_sched_policy
diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
new file mode 100644 (file)
index 0000000..d3b0b1f
--- /dev/null
@@ -0,0 +1,53 @@
+
+static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
+{
+       int sig = arg->val;
+
+       switch (sig) {
+#define        P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
+       P_SIGNUM(HUP);
+       P_SIGNUM(INT);
+       P_SIGNUM(QUIT);
+       P_SIGNUM(ILL);
+       P_SIGNUM(TRAP);
+       P_SIGNUM(ABRT);
+       P_SIGNUM(BUS);
+       P_SIGNUM(FPE);
+       P_SIGNUM(KILL);
+       P_SIGNUM(USR1);
+       P_SIGNUM(SEGV);
+       P_SIGNUM(USR2);
+       P_SIGNUM(PIPE);
+       P_SIGNUM(ALRM);
+       P_SIGNUM(TERM);
+       P_SIGNUM(CHLD);
+       P_SIGNUM(CONT);
+       P_SIGNUM(STOP);
+       P_SIGNUM(TSTP);
+       P_SIGNUM(TTIN);
+       P_SIGNUM(TTOU);
+       P_SIGNUM(URG);
+       P_SIGNUM(XCPU);
+       P_SIGNUM(XFSZ);
+       P_SIGNUM(VTALRM);
+       P_SIGNUM(PROF);
+       P_SIGNUM(WINCH);
+       P_SIGNUM(IO);
+       P_SIGNUM(PWR);
+       P_SIGNUM(SYS);
+#ifdef SIGEMT
+       P_SIGNUM(EMT);
+#endif
+#ifdef SIGSTKFLT
+       P_SIGNUM(STKFLT);
+#endif
+#ifdef SIGSWI
+       P_SIGNUM(SWI);
+#endif
+       default: break;
+       }
+
+       return scnprintf(bf, size, "%#x", sig);
+}
+
+#define SCA_SIGNUM syscall_arg__scnprintf_signum
diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
new file mode 100644 (file)
index 0000000..0a5ce81
--- /dev/null
@@ -0,0 +1,60 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP             6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC          02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK         00004000
+#endif
+
+#ifndef SOCK_TYPE_MASK
+#define SOCK_TYPE_MASK 0xf
+#endif
+
+static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, struct syscall_arg *arg)
+{
+       size_t printed;
+       int type = arg->val,
+           flags = type & ~SOCK_TYPE_MASK;
+
+       type &= SOCK_TYPE_MASK;
+       /*
+        * Can't use a strarray, MIPS may override for ABI reasons.
+        */
+       switch (type) {
+#define        P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
+       P_SK_TYPE(STREAM);
+       P_SK_TYPE(DGRAM);
+       P_SK_TYPE(RAW);
+       P_SK_TYPE(RDM);
+       P_SK_TYPE(SEQPACKET);
+       P_SK_TYPE(DCCP);
+       P_SK_TYPE(PACKET);
+#undef P_SK_TYPE
+       default:
+               printed = scnprintf(bf, size, "%#x", type);
+       }
+
+#define        P_SK_FLAG(n) \
+       if (flags & SOCK_##n) { \
+               printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+               flags &= ~SOCK_##n; \
+       }
+
+       P_SK_FLAG(CLOEXEC);
+       P_SK_FLAG(NONBLOCK);
+#undef P_SK_FLAG
+
+       if (flags)
+               printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+       return printed;
+}
+
+#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
new file mode 100644 (file)
index 0000000..7942724
--- /dev/null
@@ -0,0 +1,26 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+
+static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
+                                                   struct syscall_arg *arg)
+{
+       int printed = 0, options = arg->val;
+
+#define        P_OPTION(n) \
+       if (options & W##n) { \
+               printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+               options &= ~W##n; \
+       }
+
+       P_OPTION(NOHANG);
+       P_OPTION(UNTRACED);
+       P_OPTION(CONTINUED);
+#undef P_OPTION
+
+       if (options)
+               printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", options);
+
+       return printed;
+}
+
+#define SCA_WAITID_OPTIONS syscall_arg__scnprintf_waitid_options
index 2a83414159a65a026195f102c164ed6f6eae45be..538bae880bfee592f9ed962a4b99b7a19027c7a8 100644 (file)
@@ -1607,9 +1607,8 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 
                        ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
                        dummy_hpp.buf[ret] = '\0';
-                       rtrim(dummy_hpp.buf);
 
-                       start = ltrim(dummy_hpp.buf);
+                       start = trim(dummy_hpp.buf);
                        ret = strlen(start);
 
                        if (start != dummy_hpp.buf)
@@ -1897,11 +1896,10 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
        bool first = true;
        int ret;
 
-       if (symbol_conf.use_callchain)
+       if (symbol_conf.use_callchain) {
                folded_sign = hist_entry__folded(he);
-
-       if (symbol_conf.use_callchain)
                printed += fprintf(fp, "%c ", folded_sign);
+       }
 
        hists__for_each_format(browser->hists, fmt) {
                if (perf_hpp__should_skip(fmt, he->hists))
@@ -2137,7 +2135,7 @@ static int hists__browser_title(struct hists *hists,
                printed += snprintf(bf + printed, size - printed,
                                    ", UID: %s", hists->uid_filter_str);
        if (thread) {
-               if (sort__has_thread) {
+               if (hists__has(hists, thread)) {
                        printed += scnprintf(bf + printed, size - printed,
                                    ", Thread: %s(%d)",
                                     (thread->comm_set ? thread__comm_str(thread) : ""),
@@ -2322,7 +2320,8 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
 {
        struct thread *thread = act->thread;
 
-       if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+       if ((!hists__has(browser->hists, thread) &&
+            !hists__has(browser->hists, comm)) || thread == NULL)
                return 0;
 
        if (browser->hists->thread_filter) {
@@ -2331,7 +2330,7 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
                thread__zput(browser->hists->thread_filter);
                ui_helpline__pop();
        } else {
-               if (sort__has_thread) {
+               if (hists__has(browser->hists, thread)) {
                        ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
                                           thread->comm_set ? thread__comm_str(thread) : "",
                                           thread->tid);
@@ -2356,10 +2355,11 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act,
 {
        int ret;
 
-       if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+       if ((!hists__has(browser->hists, thread) &&
+            !hists__has(browser->hists, comm)) || thread == NULL)
                return 0;
 
-       if (sort__has_thread) {
+       if (hists__has(browser->hists, thread)) {
                ret = asprintf(optstr, "Zoom %s %s(%d) thread",
                               browser->hists->thread_filter ? "out of" : "into",
                               thread->comm_set ? thread__comm_str(thread) : "",
@@ -2382,7 +2382,7 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 {
        struct map *map = act->ms.map;
 
-       if (!sort__has_dso || map == NULL)
+       if (!hists__has(browser->hists, dso) || map == NULL)
                return 0;
 
        if (browser->hists->dso_filter) {
@@ -2409,7 +2409,7 @@ static int
 add_dso_opt(struct hist_browser *browser, struct popup_action *act,
            char **optstr, struct map *map)
 {
-       if (!sort__has_dso || map == NULL)
+       if (!hists__has(browser->hists, dso) || map == NULL)
                return 0;
 
        if (asprintf(optstr, "Zoom %s %s DSO",
@@ -2431,10 +2431,10 @@ do_browse_map(struct hist_browser *browser __maybe_unused,
 }
 
 static int
-add_map_opt(struct hist_browser *browser __maybe_unused,
+add_map_opt(struct hist_browser *browser,
            struct popup_action *act, char **optstr, struct map *map)
 {
-       if (!sort__has_dso || map == NULL)
+       if (!hists__has(browser->hists, dso) || map == NULL)
                return 0;
 
        if (asprintf(optstr, "Browse map details") < 0)
@@ -2536,7 +2536,7 @@ add_exit_opt(struct hist_browser *browser __maybe_unused,
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
-       if (!sort__has_socket || act->socket < 0)
+       if (!hists__has(browser->hists, socket) || act->socket < 0)
                return 0;
 
        if (browser->hists->socket_filter > -1) {
@@ -2558,7 +2558,7 @@ static int
 add_socket_opt(struct hist_browser *browser, struct popup_action *act,
               char **optstr, int socket_id)
 {
-       if (!sort__has_socket || socket_id < 0)
+       if (!hists__has(browser->hists, socket) || socket_id < 0)
                return 0;
 
        if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -2749,7 +2749,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                         */
                        goto out_free_stack;
                case 'a':
-                       if (!sort__has_sym) {
+                       if (!hists__has(hists, sym)) {
                                ui_browser__warning(&browser->b, delay_secs * 2,
                        "Annotation is only available for symbolic views, "
                        "include \"sym*\" in --sort to use it.");
@@ -2912,7 +2912,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        continue;
                }
 
-               if (!sort__has_sym || browser->selection == NULL)
+               if (!hists__has(hists, sym) || browser->selection == NULL)
                        goto skip_annotation;
 
                if (sort__mode == SORT_MODE__BRANCH) {
@@ -2956,7 +2956,7 @@ skip_annotation:
                        goto skip_scripting;
 
                if (browser->he_selection) {
-                       if (sort__has_thread && thread) {
+                       if (hists__has(hists, thread) && thread) {
                                nr_options += add_script_opt(browser,
                                                             &actions[nr_options],
                                                             &options[nr_options],
@@ -2971,7 +2971,7 @@ skip_annotation:
                         *
                         * See hist_browser__show_entry.
                         */
-                       if (sort__has_sym && browser->selection->sym) {
+                       if (hists__has(hists, sym) && browser->selection->sym) {
                                nr_options += add_script_opt(browser,
                                                             &actions[nr_options],
                                                             &options[nr_options],
index 2aa45b606fa4199a82a2e31244d26265feb6c021..932adfaa05af2cf2ea8860f712244d4d647adea3 100644 (file)
@@ -379,7 +379,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
                        gtk_tree_store_set(store, &iter, col_idx++, s, -1);
                }
 
-               if (symbol_conf.use_callchain && sort__has_sym) {
+               if (symbol_conf.use_callchain && hists__has(hists, sym)) {
                        if (callchain_param.mode == CHAIN_GRAPH_REL)
                                total = symbol_conf.cumulate_callchain ?
                                        h->stat_acc->period : h->stat.period;
index 3baeaa6e71b5a51e113b8b485df97b7c8ae003a2..af07ffb129ca5ba24b2459fee6ee659d8c71e97f 100644 (file)
@@ -635,7 +635,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
                ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
        }
 
-       if (verbose && sort__has_sym) /* Addr + origin */
+       if (verbose && hists__has(hists, sym)) /* Addr + origin */
                ret += 3 + BITS_PER_LONG / 4;
 
        return ret;
index 7aff5acf3265782e03254de2d8bc1dfafb56e03a..560eb47d56f945acbbdc188d6a8fb3e407faf41e 100644 (file)
@@ -569,9 +569,8 @@ static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
                        first_col = false;
 
                        fmt->header(fmt, hpp, hists_to_evsel(hists));
-                       rtrim(hpp->buf);
 
-                       header_width += fprintf(fp, "%s", ltrim(hpp->buf));
+                       header_width += fprintf(fp, "%s", trim(hpp->buf));
                }
        }
 
index da48fd843438f97d321cd792be5b05834afefb52..027bb2b89d7ff8f0f3c755e9253efa3c4cf73d86 100644 (file)
@@ -8,6 +8,7 @@ libperf-y += env.o
 libperf-y += event.o
 libperf-y += evlist.o
 libperf-y += evsel.o
+libperf-y += evsel_fprintf.o
 libperf-y += find_bit.o
 libperf-y += kallsyms.o
 libperf-y += levenshtein.o
@@ -29,6 +30,7 @@ libperf-y += usage.o
 libperf-y += wrapper.o
 libperf-y += dso.o
 libperf-y += symbol.o
+libperf-y += symbol_fprintf.o
 libperf-y += color.o
 libperf-y += header.o
 libperf-y += callchain.o
@@ -38,6 +40,7 @@ libperf-y += machine.o
 libperf-y += map.o
 libperf-y += pstack.o
 libperf-y += session.o
+libperf-$(CONFIG_AUDIT) += syscalltbl.o
 libperf-y += ordered-events.o
 libperf-y += comm.o
 libperf-y += thread.o
@@ -69,9 +72,9 @@ libperf-y += stat-shadow.o
 libperf-y += record.o
 libperf-y += srcline.o
 libperf-y += data.o
-libperf-$(CONFIG_X86) += tsc.o
-libperf-$(CONFIG_AUXTRACE) += tsc.o
+libperf-y += tsc.o
 libperf-y += cloexec.o
+libperf-y += call-path.o
 libperf-y += thread-stack.o
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
 libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
index b795b6994144cc983d09d9c2422ab37888c2e44e..d4b3d034c5031bdbf9a1dcc099812778f4e603c8 100644 (file)
@@ -1665,5 +1665,5 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize)
 
 bool ui__has_annotation(void)
 {
-       return use_browser == 1 && sort__has_sym;
+       return use_browser == 1 && perf_hpp_list.sym;
 }
index ec164fe70718df1480b02733d8701c7ab2b74297..c9169011e55ef84bf52728f3a0853d68c8120702 100644 (file)
@@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
        synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
        synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
        synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+       synth_opts->initial_skip = 0;
 }
 
 /*
@@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
                                synth_opts->last_branch_sz = val;
                        }
                        break;
+               case 's':
+                       synth_opts->initial_skip = strtoul(p, &endptr, 10);
+                       if (p == endptr)
+                               goto out_err;
+                       p = endptr;
+                       break;
                case ' ':
                case ',':
                        break;
index 57ff31ecb8e40f85bd60b876925394172d6843d8..767989e0e3126714fd956df13df7709098aadc5c 100644 (file)
@@ -68,6 +68,7 @@ enum itrace_period_type {
  * @last_branch_sz: branch context size
  * @period: 'instructions' events period
  * @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
  */
 struct itrace_synth_opts {
        bool                    set;
@@ -86,6 +87,7 @@ struct itrace_synth_opts {
        unsigned int            last_branch_sz;
        unsigned long long      period;
        enum itrace_period_type period_type;
+       unsigned long           initial_skip;
 };
 
 /**
index 0967ce601931685ed294827e8aef7c30c47736c6..493307d1414ced463a935ae30ea00bc85c3585e8 100644 (file)
@@ -842,6 +842,58 @@ bpf_map_op__new(struct parse_events_term *term)
        return op;
 }
 
+static struct bpf_map_op *
+bpf_map_op__clone(struct bpf_map_op *op)
+{
+       struct bpf_map_op *newop;
+
+       newop = memdup(op, sizeof(*op));
+       if (!newop) {
+               pr_debug("Failed to alloc bpf_map_op\n");
+               return NULL;
+       }
+
+       INIT_LIST_HEAD(&newop->list);
+       if (op->key_type == BPF_MAP_KEY_RANGES) {
+               size_t memsz = op->k.array.nr_ranges *
+                              sizeof(op->k.array.ranges[0]);
+
+               newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
+               if (!newop->k.array.ranges) {
+                       pr_debug("Failed to alloc indices for map\n");
+                       free(newop);
+                       return NULL;
+               }
+       }
+
+       return newop;
+}
+
+static struct bpf_map_priv *
+bpf_map_priv__clone(struct bpf_map_priv *priv)
+{
+       struct bpf_map_priv *newpriv;
+       struct bpf_map_op *pos, *newop;
+
+       newpriv = zalloc(sizeof(*newpriv));
+       if (!newpriv) {
+               pr_debug("No enough memory to alloc map private\n");
+               return NULL;
+       }
+       INIT_LIST_HEAD(&newpriv->ops_list);
+
+       list_for_each_entry(pos, &priv->ops_list, list) {
+               newop = bpf_map_op__clone(pos);
+               if (!newop) {
+                       bpf_map_priv__purge(newpriv);
+                       return NULL;
+               }
+               list_add_tail(&newop->list, &newpriv->ops_list);
+       }
+
+       return newpriv;
+}
+
 static int
 bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
 {
@@ -1417,6 +1469,89 @@ int bpf__apply_obj_config(void)
        return 0;
 }
 
+#define bpf__for_each_map(pos, obj, objtmp)    \
+       bpf_object__for_each_safe(obj, objtmp)  \
+               bpf_map__for_each(pos, obj)
+
+#define bpf__for_each_stdout_map(pos, obj, objtmp)     \
+       bpf__for_each_map(pos, obj, objtmp)             \
+               if (bpf_map__get_name(pos) &&           \
+                       (strcmp("__bpf_stdout__",       \
+                               bpf_map__get_name(pos)) == 0))
+
+int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+       struct bpf_map_priv *tmpl_priv = NULL;
+       struct bpf_object *obj, *tmp;
+       struct perf_evsel *evsel = NULL;
+       struct bpf_map *map;
+       int err;
+       bool need_init = false;
+
+       bpf__for_each_stdout_map(map, obj, tmp) {
+               struct bpf_map_priv *priv;
+
+               err = bpf_map__get_private(map, (void **)&priv);
+               if (err)
+                       return -BPF_LOADER_ERRNO__INTERNAL;
+
+               /*
+                * No need to check map type: type should have been
+                * verified by kernel.
+                */
+               if (!need_init && !priv)
+                       need_init = !priv;
+               if (!tmpl_priv && priv)
+                       tmpl_priv = priv;
+       }
+
+       if (!need_init)
+               return 0;
+
+       if (!tmpl_priv) {
+               err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/",
+                                  NULL);
+               if (err) {
+                       pr_debug("ERROR: failed to create bpf-output event\n");
+                       return -err;
+               }
+
+               evsel = perf_evlist__last(evlist);
+       }
+
+       bpf__for_each_stdout_map(map, obj, tmp) {
+               struct bpf_map_priv *priv;
+
+               err = bpf_map__get_private(map, (void **)&priv);
+               if (err)
+                       return -BPF_LOADER_ERRNO__INTERNAL;
+               if (priv)
+                       continue;
+
+               if (tmpl_priv) {
+                       priv = bpf_map_priv__clone(tmpl_priv);
+                       if (!priv)
+                               return -ENOMEM;
+
+                       err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+                       if (err) {
+                               bpf_map_priv__clear(map, priv);
+                               return err;
+                       }
+               } else if (evsel) {
+                       struct bpf_map_op *op;
+
+                       op = bpf_map__add_newop(map, NULL);
+                       if (IS_ERR(op))
+                               return PTR_ERR(op);
+                       op->op_type = BPF_MAP_OP_SET_EVSEL;
+                       op->v.evsel = evsel;
+               }
+       }
+
+       return 0;
+}
+
 #define ERRNO_OFFSET(e)                ((e) - __BPF_LOADER_ERRNO__START)
 #define ERRCODE_OFFSET(c)      ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
 #define NR_ERRNO       (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -1590,3 +1725,11 @@ int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
        bpf__strerror_end(buf, size);
        return 0;
 }
+
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+                              int err, char *buf, size_t size)
+{
+       bpf__strerror_head(err, buf, size);
+       bpf__strerror_end(buf, size);
+       return 0;
+}
index be4311944e3daa2abc87cdd54be72ed7bce70682..941e17275aa7d747dfeef655c21a364e627f6f95 100644 (file)
@@ -79,6 +79,11 @@ int bpf__strerror_config_obj(struct bpf_object *obj,
                             size_t size);
 int bpf__apply_obj_config(void);
 int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
+
+int bpf__setup_stdout(struct perf_evlist *evlist);
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err,
+                              char *buf, size_t size);
+
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused,
@@ -124,6 +129,12 @@ bpf__apply_obj_config(void)
        return 0;
 }
 
+static inline int
+bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+       return 0;
+}
+
 static inline int
 __bpf_strerror(char *buf, size_t size)
 {
@@ -177,5 +188,13 @@ bpf__strerror_apply_obj_config(int err __maybe_unused,
 {
        return __bpf_strerror(buf, size);
 }
+
+static inline int
+bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+                          int err __maybe_unused, char *buf,
+                          size_t size)
+{
+       return __bpf_strerror(buf, size);
+}
 #endif
 #endif
index 0573c2ec861d9de0a04dbb71862e0ed85e255ff1..b6ecf87bc3e3c32dac43a0b3f521a72ff3e28ecd 100644 (file)
@@ -261,14 +261,14 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
 
                if (dso__is_vdso(pos)) {
                        name = pos->short_name;
-                       name_len = pos->short_name_len + 1;
+                       name_len = pos->short_name_len;
                } else if (dso__is_kcore(pos)) {
                        machine__mmap_name(machine, nm, sizeof(nm));
                        name = nm;
-                       name_len = strlen(nm) + 1;
+                       name_len = strlen(nm);
                } else {
                        name = pos->long_name;
-                       name_len = pos->long_name_len + 1;
+                       name_len = pos->long_name_len;
                }
 
                in_kernel = pos->kernel ||
diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c
new file mode 100644 (file)
index 0000000..904a170
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+#include "util.h"
+#include "call-path.h"
+
+static void call_path__init(struct call_path *cp, struct call_path *parent,
+                           struct symbol *sym, u64 ip, bool in_kernel)
+{
+       cp->parent = parent;
+       cp->sym = sym;
+       cp->ip = sym ? 0 : ip;
+       cp->db_id = 0;
+       cp->in_kernel = in_kernel;
+       RB_CLEAR_NODE(&cp->rb_node);
+       cp->children = RB_ROOT;
+}
+
+struct call_path_root *call_path_root__new(void)
+{
+       struct call_path_root *cpr;
+
+       cpr = zalloc(sizeof(struct call_path_root));
+       if (!cpr)
+               return NULL;
+       call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+       INIT_LIST_HEAD(&cpr->blocks);
+       return cpr;
+}
+
+void call_path_root__free(struct call_path_root *cpr)
+{
+       struct call_path_block *pos, *n;
+
+       list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
+               list_del(&pos->node);
+               free(pos);
+       }
+       free(cpr);
+}
+
+static struct call_path *call_path__new(struct call_path_root *cpr,
+                                       struct call_path *parent,
+                                       struct symbol *sym, u64 ip,
+                                       bool in_kernel)
+{
+       struct call_path_block *cpb;
+       struct call_path *cp;
+       size_t n;
+
+       if (cpr->next < cpr->sz) {
+               cpb = list_last_entry(&cpr->blocks, struct call_path_block,
+                                     node);
+       } else {
+               cpb = zalloc(sizeof(struct call_path_block));
+               if (!cpb)
+                       return NULL;
+               list_add_tail(&cpb->node, &cpr->blocks);
+               cpr->sz += CALL_PATH_BLOCK_SIZE;
+       }
+
+       n = cpr->next++ & CALL_PATH_BLOCK_MASK;
+       cp = &cpb->cp[n];
+
+       call_path__init(cp, parent, sym, ip, in_kernel);
+
+       return cp;
+}
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+                                    struct call_path *parent,
+                                    struct symbol *sym, u64 ip, u64 ks)
+{
+       struct rb_node **p;
+       struct rb_node *node_parent = NULL;
+       struct call_path *cp;
+       bool in_kernel = ip >= ks;
+
+       if (sym)
+               ip = 0;
+
+       if (!parent)
+               return call_path__new(cpr, parent, sym, ip, in_kernel);
+
+       p = &parent->children.rb_node;
+       while (*p != NULL) {
+               node_parent = *p;
+               cp = rb_entry(node_parent, struct call_path, rb_node);
+
+               if (cp->sym == sym && cp->ip == ip)
+                       return cp;
+
+               if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
+       }
+
+       cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+       if (!cp)
+               return NULL;
+
+       rb_link_node(&cp->rb_node, node_parent, p);
+       rb_insert_color(&cp->rb_node, &parent->children);
+
+       return cp;
+}
diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h
new file mode 100644 (file)
index 0000000..477f6d0
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_CALL_PATH_H
+#define __PERF_CALL_PATH_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/**
+ * struct call_path - node in list of calls leading to a function call.
+ * @parent: call path to the parent function call
+ * @sym: symbol of function called
+ * @ip: only if sym is null, the ip of the function
+ * @db_id: id used for db-export
+ * @in_kernel: whether function is a in the kernel
+ * @rb_node: node in parent's tree of called functions
+ * @children: tree of call paths of functions called
+ *
+ * In combination with the call_return structure, the call_path structure
+ * defines a context-sensitve call-graph.
+ */
+struct call_path {
+       struct call_path *parent;
+       struct symbol *sym;
+       u64 ip;
+       u64 db_id;
+       bool in_kernel;
+       struct rb_node rb_node;
+       struct rb_root children;
+};
+
+#define CALL_PATH_BLOCK_SHIFT 8
+#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
+#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
+
+struct call_path_block {
+       struct call_path cp[CALL_PATH_BLOCK_SIZE];
+       struct list_head node;
+};
+
+/**
+ * struct call_path_root - root of all call paths.
+ * @call_path: root call path
+ * @blocks: list of blocks to store call paths
+ * @next: next free space
+ * @sz: number of spaces
+ */
+struct call_path_root {
+       struct call_path call_path;
+       struct list_head blocks;
+       size_t next;
+       size_t sz;
+};
+
+struct call_path_root *call_path_root__new(void);
+void call_path_root__free(struct call_path_root *cpr);
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+                                    struct call_path *parent,
+                                    struct symbol *sym, u64 ip, u64 ks);
+
+#endif
index 24b4bd0d77545e7bb9f95e83222eb103c92f5151..07fd30bc2f816feeda146fe98343f5ce408884e8 100644 (file)
@@ -109,6 +109,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
        bool record_opt_set = false;
        bool try_stack_size = false;
 
+       callchain_param.enabled = true;
        symbol_conf.use_callchain = true;
 
        if (!arg)
@@ -117,6 +118,7 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
        while ((tok = strtok((char *)arg, ",")) != NULL) {
                if (!strncmp(tok, "none", strlen(tok))) {
                        callchain_param.mode = CHAIN_NONE;
+                       callchain_param.enabled = false;
                        symbol_conf.use_callchain = false;
                        return 0;
                }
@@ -788,7 +790,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
        return 0;
 }
 
-int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent,
+int sample__resolve_callchain(struct perf_sample *sample,
+                             struct callchain_cursor *cursor, struct symbol **parent,
                              struct perf_evsel *evsel, struct addr_location *al,
                              int max_stack)
 {
@@ -796,8 +799,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent
                return 0;
 
        if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
-           sort__has_parent) {
-               return thread__resolve_callchain(al->thread, evsel, sample,
+           perf_hpp_list.parent) {
+               return thread__resolve_callchain(al->thread, cursor, evsel, sample,
                                                 parent, al, max_stack);
        }
        return 0;
index d2a9e694810c12c3a6c7fd8e89879b1424a87fa3..65e2a4f7cb4e810711fa549cb09977b686c08ee2 100644 (file)
@@ -212,7 +212,14 @@ struct hist_entry;
 int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
 int record_callchain_opt(const struct option *opt, const char *arg, int unset);
 
-int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent,
+struct record_opts;
+
+int record_opts__parse_callchain(struct record_opts *record,
+                                struct callchain_param *callchain,
+                                const char *arg, bool unset);
+
+int sample__resolve_callchain(struct perf_sample *sample,
+                             struct callchain_cursor *cursor, struct symbol **parent,
                              struct perf_evsel *evsel, struct addr_location *al,
                              int max_stack);
 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
index 4e727635476eadf5b105a7be4620f86a4bf46499..dad7d827216816b275112f1cb776396467071663 100644 (file)
@@ -13,6 +13,7 @@
 #include <subcmd/exec-cmd.h>
 #include "util/hist.h"  /* perf_hist_config */
 #include "util/llvm-utils.h"   /* perf_llvm_config */
+#include "config.h"
 
 #define MAXNAME (256)
 
@@ -377,6 +378,21 @@ const char *perf_config_dirname(const char *name, const char *value)
        return value;
 }
 
+static int perf_buildid_config(const char *var, const char *value)
+{
+       /* same dir for all commands */
+       if (!strcmp(var, "buildid.dir")) {
+               const char *dir = perf_config_dirname(var, value);
+
+               if (!dir)
+                       return -1;
+               strncpy(buildid_dir, dir, MAXPATHLEN-1);
+               buildid_dir[MAXPATHLEN-1] = '\0';
+       }
+
+       return 0;
+}
+
 static int perf_default_core_config(const char *var __maybe_unused,
                                    const char *value __maybe_unused)
 {
@@ -412,6 +428,9 @@ int perf_default_config(const char *var, const char *value,
        if (!prefixcmp(var, "llvm."))
                return perf_llvm_config(var, value);
 
+       if (!prefixcmp(var, "buildid."))
+               return perf_buildid_config(var, value);
+
        /* Add other config variables here. */
        return 0;
 }
@@ -506,41 +525,185 @@ out:
        return ret;
 }
 
-/*
- * Call this to report error for your variable that should not
- * get a boolean value (i.e. "[my] var" means "true").
- */
-int config_error_nonbool(const char *var)
+static struct perf_config_section *find_section(struct list_head *sections,
+                                               const char *section_name)
 {
-       return error("Missing value for '%s'", var);
+       struct perf_config_section *section;
+
+       list_for_each_entry(section, sections, node)
+               if (!strcmp(section->name, section_name))
+                       return section;
+
+       return NULL;
+}
+
+static struct perf_config_item *find_config_item(const char *name,
+                                                struct perf_config_section *section)
+{
+       struct perf_config_item *item;
+
+       list_for_each_entry(item, &section->items, node)
+               if (!strcmp(item->name, name))
+                       return item;
+
+       return NULL;
 }
 
-struct buildid_dir_config {
-       char *dir;
-};
+static struct perf_config_section *add_section(struct list_head *sections,
+                                              const char *section_name)
+{
+       struct perf_config_section *section = zalloc(sizeof(*section));
+
+       if (!section)
+               return NULL;
+
+       INIT_LIST_HEAD(&section->items);
+       section->name = strdup(section_name);
+       if (!section->name) {
+               pr_debug("%s: strdup failed\n", __func__);
+               free(section);
+               return NULL;
+       }
+
+       list_add_tail(&section->node, sections);
+       return section;
+}
 
-static int buildid_dir_command_config(const char *var, const char *value,
-                                     void *data)
+static struct perf_config_item *add_config_item(struct perf_config_section *section,
+                                               const char *name)
 {
-       struct buildid_dir_config *c = data;
-       const char *v;
+       struct perf_config_item *item = zalloc(sizeof(*item));
 
-       /* same dir for all commands */
-       if (!strcmp(var, "buildid.dir")) {
-               v = perf_config_dirname(var, value);
-               if (!v)
-                       return -1;
-               strncpy(c->dir, v, MAXPATHLEN-1);
-               c->dir[MAXPATHLEN-1] = '\0';
+       if (!item)
+               return NULL;
+
+       item->name = strdup(name);
+       if (!item->name) {
+               pr_debug("%s: strdup failed\n", __func__);
+               free(item);
+               return NULL;
        }
+
+       list_add_tail(&item->node, &section->items);
+       return item;
+}
+
+static int set_value(struct perf_config_item *item, const char *value)
+{
+       char *val = strdup(value);
+
+       if (!val)
+               return -1;
+
+       zfree(&item->value);
+       item->value = val;
        return 0;
 }
 
-static void check_buildid_dir_config(void)
+static int collect_config(const char *var, const char *value,
+                         void *perf_config_set)
 {
-       struct buildid_dir_config c;
-       c.dir = buildid_dir;
-       perf_config(buildid_dir_command_config, &c);
+       int ret = -1;
+       char *ptr, *key;
+       char *section_name, *name;
+       struct perf_config_section *section = NULL;
+       struct perf_config_item *item = NULL;
+       struct perf_config_set *set = perf_config_set;
+       struct list_head *sections = &set->sections;
+
+       key = ptr = strdup(var);
+       if (!key) {
+               pr_debug("%s: strdup failed\n", __func__);
+               return -1;
+       }
+
+       section_name = strsep(&ptr, ".");
+       name = ptr;
+       if (name == NULL || value == NULL)
+               goto out_free;
+
+       section = find_section(sections, section_name);
+       if (!section) {
+               section = add_section(sections, section_name);
+               if (!section)
+                       goto out_free;
+       }
+
+       item = find_config_item(name, section);
+       if (!item) {
+               item = add_config_item(section, name);
+               if (!item)
+                       goto out_free;
+       }
+
+       ret = set_value(item, value);
+       return ret;
+
+out_free:
+       free(key);
+       perf_config_set__delete(set);
+       return -1;
+}
+
+struct perf_config_set *perf_config_set__new(void)
+{
+       struct perf_config_set *set = zalloc(sizeof(*set));
+
+       if (set) {
+               INIT_LIST_HEAD(&set->sections);
+               perf_config(collect_config, set);
+       }
+
+       return set;
+}
+
+static void perf_config_item__delete(struct perf_config_item *item)
+{
+       zfree(&item->name);
+       zfree(&item->value);
+       free(item);
+}
+
+static void perf_config_section__purge(struct perf_config_section *section)
+{
+       struct perf_config_item *item, *tmp;
+
+       list_for_each_entry_safe(item, tmp, &section->items, node) {
+               list_del_init(&item->node);
+               perf_config_item__delete(item);
+       }
+}
+
+static void perf_config_section__delete(struct perf_config_section *section)
+{
+       perf_config_section__purge(section);
+       zfree(&section->name);
+       free(section);
+}
+
+static void perf_config_set__purge(struct perf_config_set *set)
+{
+       struct perf_config_section *section, *tmp;
+
+       list_for_each_entry_safe(section, tmp, &set->sections, node) {
+               list_del_init(&section->node);
+               perf_config_section__delete(section);
+       }
+}
+
+void perf_config_set__delete(struct perf_config_set *set)
+{
+       perf_config_set__purge(set);
+       free(set);
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+       return error("Missing value for '%s'", var);
 }
 
 void set_buildid_dir(const char *dir)
@@ -548,16 +711,13 @@ void set_buildid_dir(const char *dir)
        if (dir)
                scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
 
-       /* try config file */
-       if (buildid_dir[0] == '\0')
-               check_buildid_dir_config();
-
        /* default to $HOME/.debug */
        if (buildid_dir[0] == '\0') {
-               char *v = getenv("HOME");
-               if (v) {
+               char *home = getenv("HOME");
+
+               if (home) {
                        snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
-                                v, DEBUG_CACHE_DIR);
+                                home, DEBUG_CACHE_DIR);
                } else {
                        strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
                }
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
new file mode 100644 (file)
index 0000000..22ec626
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef __PERF_CONFIG_H
+#define __PERF_CONFIG_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+
+struct perf_config_item {
+       char *name;
+       char *value;
+       struct list_head node;
+};
+
+struct perf_config_section {
+       char *name;
+       struct list_head items;
+       struct list_head node;
+};
+
+struct perf_config_set {
+       struct list_head sections;
+};
+
+struct perf_config_set *perf_config_set__new(void);
+void perf_config_set__delete(struct perf_config_set *set);
+
+#endif /* __PERF_CONFIG_H */
index 9bcf2bed3a6d1b7369ee4deee7f38e9c4abab06d..02d801670f30053fa1f6344f7a944bc8e2047842 100644 (file)
@@ -587,3 +587,15 @@ int cpu__setup_cpunode_map(void)
        closedir(dir1);
        return 0;
 }
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu)
+{
+       int i;
+
+       for (i = 0; i < cpus->nr; ++i) {
+               if (cpus->map[i] == cpu)
+                       return true;
+       }
+
+       return false;
+}
index 81a2562aaa2b02261b88c960997238dc9e0925ab..1a0a35073ce1e8b52ce7a4bc07efdcadefbb45a3 100644 (file)
@@ -66,4 +66,6 @@ int cpu__get_node(int cpu);
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
                       int (*f)(struct cpu_map *map, int cpu, void *data),
                       void *data);
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu);
 #endif /* __PERF_CPUMAP_H */
index 1921942fc2e035cb2ddec0ba94b066e02a8f5bc9..be83516155ee5d7914d699b48df33441b7f6f345 100644 (file)
@@ -136,3 +136,44 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
 {
        return writen(file->fd, buf, size);
 }
+
+int perf_data_file__switch(struct perf_data_file *file,
+                          const char *postfix,
+                          size_t pos, bool at_exit)
+{
+       char *new_filepath;
+       int ret;
+
+       if (check_pipe(file))
+               return -EINVAL;
+       if (perf_data_file__is_read(file))
+               return -EINVAL;
+
+       if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0)
+               return -ENOMEM;
+
+       /*
+        * Only fire a warning, don't return error, continue fill
+        * original file.
+        */
+       if (rename(file->path, new_filepath))
+               pr_warning("Failed to rename %s to %s\n", file->path, new_filepath);
+
+       if (!at_exit) {
+               close(file->fd);
+               ret = perf_data_file__open(file);
+               if (ret < 0)
+                       goto out;
+
+               if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) {
+                       ret = -errno;
+                       pr_debug("Failed to lseek to %zu: %s",
+                                pos, strerror(errno));
+                       goto out;
+               }
+       }
+       ret = file->fd;
+out:
+       free(new_filepath);
+       return ret;
+}
index 2b15d0c95c7f312de08a36a05b08c8356b039616..ae510ce16cb1277721bd132438c08311febb7156 100644 (file)
@@ -46,5 +46,14 @@ int perf_data_file__open(struct perf_data_file *file);
 void perf_data_file__close(struct perf_data_file *file);
 ssize_t perf_data_file__write(struct perf_data_file *file,
                              void *buf, size_t size);
-
+/*
+ * If at_exit is set, only rename current perf.data to
+ * perf.data.<postfix>, continue write on original file.
+ * Set at_exit when flushing the last output.
+ *
+ * Return value is fd of new output.
+ */
+int perf_data_file__switch(struct perf_data_file *file,
+                          const char *postfix,
+                          size_t pos, bool at_exit);
 #endif /* __PERF_DATA_H */
index 049438d51b9a496cd20dac37c63a41a0b3512a6a..f8e3057ae3b1903ff7c15f605d0319486c957b30 100644 (file)
@@ -23,6 +23,8 @@
 #include "event.h"
 #include "util.h"
 #include "thread-stack.h"
+#include "callchain.h"
+#include "call-path.h"
 #include "db-export.h"
 
 struct deferred_export {
@@ -276,6 +278,79 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
        return 0;
 }
 
+static struct call_path *call_path_from_sample(struct db_export *dbe,
+                                              struct machine *machine,
+                                              struct thread *thread,
+                                              struct perf_sample *sample,
+                                              struct perf_evsel *evsel)
+{
+       u64 kernel_start = machine__kernel_start(machine);
+       struct call_path *current = &dbe->cpr->call_path;
+       enum chain_order saved_order = callchain_param.order;
+       int err;
+
+       if (!symbol_conf.use_callchain || !sample->callchain)
+               return NULL;
+
+       /*
+        * Since the call path tree must be built starting with the root, we
+        * must use ORDER_CALL for call chain resolution, in order to process
+        * the callchain starting with the root node and ending with the leaf.
+        */
+       callchain_param.order = ORDER_CALLER;
+       err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+                                       sample, NULL, NULL,
+                                       sysctl_perf_event_max_stack);
+       if (err) {
+               callchain_param.order = saved_order;
+               return NULL;
+       }
+       callchain_cursor_commit(&callchain_cursor);
+
+       while (1) {
+               struct callchain_cursor_node *node;
+               struct addr_location al;
+               u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
+
+               memset(&al, 0, sizeof(al));
+
+               node = callchain_cursor_current(&callchain_cursor);
+               if (!node)
+                       break;
+               /*
+                * Handle export of symbol and dso for this node by
+                * constructing an addr_location struct and then passing it to
+                * db_ids_from_al() to perform the export.
+                */
+               al.sym = node->sym;
+               al.map = node->map;
+               al.machine = machine;
+               if (al.map)
+                       al.addr = al.map->map_ip(al.map, node->ip);
+               else
+                       al.addr = node->ip;
+
+               db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
+
+               /* add node to the call path tree if it doesn't exist */
+               current = call_path__findnew(dbe->cpr, current,
+                                            al.sym, node->ip,
+                                            kernel_start);
+
+               callchain_cursor_advance(&callchain_cursor);
+       }
+
+       /* Reset the callchain order to its prior value. */
+       callchain_param.order = saved_order;
+
+       if (current == &dbe->cpr->call_path) {
+               /* Bail because the callchain was empty. */
+               return NULL;
+       }
+
+       return current;
+}
+
 int db_export__branch_type(struct db_export *dbe, u32 branch_type,
                           const char *name)
 {
@@ -329,6 +404,16 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
        if (err)
                goto out_put;
 
+       if (dbe->cpr) {
+               struct call_path *cp = call_path_from_sample(dbe, al->machine,
+                                                            thread, sample,
+                                                            evsel);
+               if (cp) {
+                       db_export__call_path(dbe, cp);
+                       es.call_path_id = cp->db_id;
+               }
+       }
+
        if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
            sample_addr_correlates_sym(&evsel->attr)) {
                struct addr_location addr_al;
index 25e22fd76aca1537813b9278db205993ca45075b..67bc6b8ad2d6ecc42519e980a66e78e71da108ab 100644 (file)
@@ -27,6 +27,7 @@ struct dso;
 struct perf_sample;
 struct addr_location;
 struct call_return_processor;
+struct call_path_root;
 struct call_path;
 struct call_return;
 
@@ -43,6 +44,7 @@ struct export_sample {
        u64                     addr_dso_db_id;
        u64                     addr_sym_db_id;
        u64                     addr_offset; /* addr offset from symbol start */
+       u64                     call_path_id;
 };
 
 struct db_export {
@@ -64,6 +66,7 @@ struct db_export {
        int (*export_call_return)(struct db_export *dbe,
                                  struct call_return *cr);
        struct call_return_processor *crp;
+       struct call_path_root *cpr;
        u64 evsel_last_db_id;
        u64 machine_last_db_id;
        u64 thread_last_db_id;
index 577e600c8eb15a66cb50580b0c6361060147894a..aea189b41cc8c43f8ce325c4fde2f8ae27507df3 100644 (file)
@@ -959,6 +959,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
        return 0;
 }
 
+#ifdef HAVE_DWARF_GETLOCATIONS
 /**
  * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
  * @sp_die: a subprogram DIE
@@ -1080,3 +1081,11 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
 
        return ret;
 }
+#else
+int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
+                     Dwarf_Die *vr_die __maybe_unused,
+                     struct strbuf *buf __maybe_unused)
+{
+       return -ENOTSUP;
+}
+#endif
index dad55d04ffdd5074c212fac7dbcd306444f008e6..f6fcc68329499f255ddc1a5012dd36fdc04d8024 100644 (file)
@@ -45,6 +45,7 @@ static const char *perf_event__names[] = {
        [PERF_RECORD_STAT]                      = "STAT",
        [PERF_RECORD_STAT_ROUND]                = "STAT_ROUND",
        [PERF_RECORD_EVENT_UPDATE]              = "EVENT_UPDATE",
+       [PERF_RECORD_TIME_CONV]                 = "TIME_CONV",
 };
 
 const char *perf_event__name(unsigned int id)
@@ -433,7 +434,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 {
        char filename[PATH_MAX];
        DIR *tasks;
-       struct dirent dirent, *next;
+       struct dirent *dirent;
        pid_t tgid, ppid;
        int rc = 0;
 
@@ -462,11 +463,11 @@ static int __event__synthesize_thread(union perf_event *comm_event,
                return 0;
        }
 
-       while (!readdir_r(tasks, &dirent, &next) && next) {
+       while ((dirent = readdir(tasks)) != NULL) {
                char *end;
                pid_t _pid;
 
-               _pid = strtol(dirent.d_name, &end, 10);
+               _pid = strtol(dirent->d_name, &end, 10);
                if (*end)
                        continue;
 
@@ -575,7 +576,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
 {
        DIR *proc;
        char proc_path[PATH_MAX];
-       struct dirent dirent, *next;
+       struct dirent *dirent;
        union perf_event *comm_event, *mmap_event, *fork_event;
        int err = -1;
 
@@ -600,9 +601,9 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
        if (proc == NULL)
                goto out_free_fork;
 
-       while (!readdir_r(proc, &dirent, &next) && next) {
+       while ((dirent = readdir(proc)) != NULL) {
                char *end;
-               pid_t pid = strtol(dirent.d_name, &end, 10);
+               pid_t pid = strtol(dirent->d_name, &end, 10);
 
                if (*end) /* only interested in proper numerical dirents */
                        continue;
index 6bb1c928350d414f8cc157c1c1a608ab4ac6c8ee..8d363d5e65a2e14c019fd18a6129ceef7b1538c3 100644 (file)
@@ -233,6 +233,7 @@ enum perf_user_event_type { /* above any possible kernel type */
        PERF_RECORD_STAT                        = 76,
        PERF_RECORD_STAT_ROUND                  = 77,
        PERF_RECORD_EVENT_UPDATE                = 78,
+       PERF_RECORD_TIME_CONV                   = 79,
        PERF_RECORD_HEADER_MAX
 };
 
@@ -469,6 +470,13 @@ struct stat_round_event {
        u64                             time;
 };
 
+struct time_conv_event {
+       struct perf_event_header header;
+       u64 time_shift;
+       u64 time_mult;
+       u64 time_zero;
+};
+
 union perf_event {
        struct perf_event_header        header;
        struct mmap_event               mmap;
@@ -497,6 +505,7 @@ union perf_event {
        struct stat_config_event        stat_config;
        struct stat_event               stat;
        struct stat_round_event         stat_round;
+       struct time_conv_event          time_conv;
 };
 
 void perf_event__print_totals(void);
index 86a03836a83fc3f8ee8648d83317b8d91e3f48d8..17cd01421e7f85f583f62d6421d50c8a127950f4 100644 (file)
@@ -679,53 +679,52 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
        return NULL;
 }
 
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *
+perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
+               u64 end, u64 *prev)
 {
-       struct perf_mmap *md = &evlist->mmap[idx];
-       u64 head;
-       u64 old = md->prev;
        unsigned char *data = md->base + page_size;
        union perf_event *event = NULL;
+       int diff = end - start;
 
-       /*
-        * Check if event was unmapped due to a POLLHUP/POLLERR.
-        */
-       if (!atomic_read(&md->refcnt))
-               return NULL;
-
-       head = perf_mmap__read_head(md);
-       if (evlist->overwrite) {
+       if (check_messup) {
                /*
                 * If we're further behind than half the buffer, there's a chance
                 * the writer will bite our tail and mess up the samples under us.
                 *
-                * If we somehow ended up ahead of the head, we got messed up.
+                * If we somehow ended up ahead of the 'end', we got messed up.
                 *
-                * In either case, truncate and restart at head.
+                * In either case, truncate and restart at 'end'.
                 */
-               int diff = head - old;
                if (diff > md->mask / 2 || diff < 0) {
                        fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
 
                        /*
-                        * head points to a known good entry, start there.
+                        * 'end' points to a known good entry, start there.
                         */
-                       old = head;
+                       start = end;
+                       diff = 0;
                }
        }
 
-       if (old != head) {
+       if (diff >= (int)sizeof(event->header)) {
                size_t size;
 
-               event = (union perf_event *)&data[old & md->mask];
+               event = (union perf_event *)&data[start & md->mask];
                size = event->header.size;
 
+               if (size < sizeof(event->header) || diff < (int)size) {
+                       event = NULL;
+                       goto broken_event;
+               }
+
                /*
                 * Event straddles the mmap boundary -- header should always
                 * be inside due to u64 alignment of output.
                 */
-               if ((old & md->mask) + size != ((old + size) & md->mask)) {
-                       unsigned int offset = old;
+               if ((start & md->mask) + size != ((start + size) & md->mask)) {
+                       unsigned int offset = start;
                        unsigned int len = min(sizeof(*event), size), cpy;
                        void *dst = md->event_copy;
 
@@ -740,14 +739,33 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
                        event = (union perf_event *) md->event_copy;
                }
 
-               old += size;
+               start += size;
        }
 
-       md->prev = old;
+broken_event:
+       if (prev)
+               *prev = start;
 
        return event;
 }
 
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+       struct perf_mmap *md = &evlist->mmap[idx];
+       u64 head;
+       u64 old = md->prev;
+
+       /*
+        * Check if event was unmapped due to a POLLHUP/POLLERR.
+        */
+       if (!atomic_read(&md->refcnt))
+               return NULL;
+
+       head = perf_mmap__read_head(md);
+
+       return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev);
+}
+
 static bool perf_mmap__empty(struct perf_mmap *md)
 {
        return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
@@ -986,26 +1004,34 @@ out_unmap:
        return -1;
 }
 
-static size_t perf_evlist__mmap_size(unsigned long pages)
+unsigned long perf_event_mlock_kb_in_pages(void)
 {
-       if (pages == UINT_MAX) {
-               int max;
+       unsigned long pages;
+       int max;
 
-               if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
-                       /*
-                        * Pick a once upon a time good value, i.e. things look
-                        * strange since we can't read a sysctl value, but lets not
-                        * die yet...
-                        */
-                       max = 512;
-               } else {
-                       max -= (page_size / 1024);
-               }
+       if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+               /*
+                * Pick a once upon a time good value, i.e. things look
+                * strange since we can't read a sysctl value, but lets not
+                * die yet...
+                */
+               max = 512;
+       } else {
+               max -= (page_size / 1024);
+       }
+
+       pages = (max * 1024) / page_size;
+       if (!is_power_of_2(pages))
+               pages = rounddown_pow_of_two(pages);
+
+       return pages;
+}
 
-               pages = (max * 1024) / page_size;
-               if (!is_power_of_2(pages))
-                       pages = rounddown_pow_of_two(pages);
-       } else if (!is_power_of_2(pages))
+static size_t perf_evlist__mmap_size(unsigned long pages)
+{
+       if (pages == UINT_MAX)
+               pages = perf_event_mlock_kb_in_pages();
+       else if (!is_power_of_2(pages))
                return 0;
 
        return (pages + 1) * page_size;
@@ -1192,6 +1218,24 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
        perf_evlist__propagate_maps(evlist);
 }
 
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+                                  enum perf_event_sample_format bit)
+{
+       struct perf_evsel *evsel;
+
+       evlist__for_each(evlist, evsel)
+               __perf_evsel__set_sample_bit(evsel, bit);
+}
+
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+                                    enum perf_event_sample_format bit)
+{
+       struct perf_evsel *evsel;
+
+       evlist__for_each(evlist, evsel)
+               __perf_evsel__reset_sample_bit(evsel, bit);
+}
+
 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
 {
        struct perf_evsel *evsel;
index a0d15221db6e878412126f1ec5de08cb030f132f..208897a646cae00bd6b279382cbd953a91df0372 100644 (file)
@@ -87,6 +87,17 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist);
 int perf_evlist__add_newtp(struct perf_evlist *evlist,
                           const char *sys, const char *name, void *handler);
 
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+                                  enum perf_event_sample_format bit);
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+                                    enum perf_event_sample_format bit);
+
+#define perf_evlist__set_sample_bit(evlist, bit) \
+       __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit)
+
+#define perf_evlist__reset_sample_bit(evlist, bit) \
+       __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
+
 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
@@ -123,11 +134,14 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 int perf_evlist__open(struct perf_evlist *evlist);
 void perf_evlist__close(struct perf_evlist *evlist);
 
+struct callchain_param;
+
 void perf_evlist__set_id_pos(struct perf_evlist *evlist);
 bool perf_can_sample_identifier(void);
 bool perf_can_record_switch_events(void);
 bool perf_can_record_cpu_wide(void);
-void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+                        struct callchain_param *callchain);
 int record_opts__config(struct record_opts *opts);
 
 int perf_evlist__prepare_workload(struct perf_evlist *evlist,
@@ -144,6 +158,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,
                                  const char *str,
                                  int unset);
 
+unsigned long perf_event_mlock_kb_in_pages(void);
+
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
                         bool overwrite, unsigned int auxtrace_pages,
                         bool auxtrace_overwrite);
index 738ce226002b8a0e88093fefaf74e9fe6093a13a..3371721a05f2f71a0f4bb0258b81979308bf7936 100644 (file)
@@ -226,7 +226,8 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
                perf_evsel__init(evsel, attr, idx);
 
        if (perf_evsel__is_bpf_output(evsel)) {
-               evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+               evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+                                           PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
                evsel->attr.sample_period = 1;
        }
 
@@ -561,10 +562,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
        return ret;
 }
 
-static void
-perf_evsel__config_callgraph(struct perf_evsel *evsel,
-                            struct record_opts *opts,
-                            struct callchain_param *param)
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+                                 struct record_opts *opts,
+                                 struct callchain_param *param)
 {
        bool function = perf_evsel__is_function_event(evsel);
        struct perf_event_attr *attr = &evsel->attr;
@@ -704,7 +704,7 @@ static void apply_config_terms(struct perf_evsel *evsel,
 
                /* set perf-event callgraph */
                if (param.enabled)
-                       perf_evsel__config_callgraph(evsel, opts, &param);
+                       perf_evsel__config_callchain(evsel, opts, &param);
        }
 }
 
@@ -736,7 +736,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
  *     enable/disable events specifically, as there's no
  *     initial traced exec call.
  */
-void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
+void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
+                       struct callchain_param *callchain)
 {
        struct perf_evsel *leader = evsel->leader;
        struct perf_event_attr *attr = &evsel->attr;
@@ -811,8 +812,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
        if (perf_evsel__is_function_event(evsel))
                evsel->attr.exclude_callchain_user = 1;
 
-       if (callchain_param.enabled && !evsel->no_aux_samples)
-               perf_evsel__config_callgraph(evsel, opts, &callchain_param);
+       if (callchain && callchain->enabled && !evsel->no_aux_samples)
+               perf_evsel__config_callchain(evsel, opts, callchain);
 
        if (opts->sample_intr_regs) {
                attr->sample_regs_intr = opts->sample_intr_regs;
@@ -1230,6 +1231,21 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
        __p_bits(buf, size, value, bits);
 }
 
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+       struct bit_names bits[] = {
+               bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+               bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+               bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+               bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+               bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+               { .name = NULL, }
+       };
+#undef bit_name
+       __p_bits(buf, size, value, bits);
+}
+
 static void __p_read_format(char *buf, size_t size, u64 value)
 {
 #define bit_name(n) { PERF_FORMAT_##n, #n }
@@ -1248,6 +1264,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
 #define p_unsigned(val)                snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
 #define p_signed(val)          snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
 #define p_sample_type(val)     __p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
 #define p_read_format(val)     __p_read_format(buf, BUF_SIZE, val)
 
 #define PRINT_ATTRn(_n, _f, _p)                                \
@@ -1304,7 +1321,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
        PRINT_ATTRf(bp_type, p_unsigned);
        PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
        PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
-       PRINT_ATTRf(branch_sample_type, p_unsigned);
+       PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
        PRINT_ATTRf(sample_regs_user, p_hex);
        PRINT_ATTRf(sample_stack_user, p_unsigned);
        PRINT_ATTRf(clockid, p_signed);
@@ -2253,95 +2270,6 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
        return 0;
 }
 
-static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
-{
-       va_list args;
-       int ret = 0;
-
-       if (!*first) {
-               ret += fprintf(fp, ",");
-       } else {
-               ret += fprintf(fp, ":");
-               *first = false;
-       }
-
-       va_start(args, fmt);
-       ret += vfprintf(fp, fmt, args);
-       va_end(args);
-       return ret;
-}
-
-static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
-{
-       return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
-}
-
-int perf_evsel__fprintf(struct perf_evsel *evsel,
-                       struct perf_attr_details *details, FILE *fp)
-{
-       bool first = true;
-       int printed = 0;
-
-       if (details->event_group) {
-               struct perf_evsel *pos;
-
-               if (!perf_evsel__is_group_leader(evsel))
-                       return 0;
-
-               if (evsel->nr_members > 1)
-                       printed += fprintf(fp, "%s{", evsel->group_name ?: "");
-
-               printed += fprintf(fp, "%s", perf_evsel__name(evsel));
-               for_each_group_member(pos, evsel)
-                       printed += fprintf(fp, ",%s", perf_evsel__name(pos));
-
-               if (evsel->nr_members > 1)
-                       printed += fprintf(fp, "}");
-               goto out;
-       }
-
-       printed += fprintf(fp, "%s", perf_evsel__name(evsel));
-
-       if (details->verbose) {
-               printed += perf_event_attr__fprintf(fp, &evsel->attr,
-                                                   __print_attr__fprintf, &first);
-       } else if (details->freq) {
-               const char *term = "sample_freq";
-
-               if (!evsel->attr.freq)
-                       term = "sample_period";
-
-               printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
-                                        term, (u64)evsel->attr.sample_freq);
-       }
-
-       if (details->trace_fields) {
-               struct format_field *field;
-
-               if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
-                       printed += comma_fprintf(fp, &first, " (not a tracepoint)");
-                       goto out;
-               }
-
-               field = evsel->tp_format->format.fields;
-               if (field == NULL) {
-                       printed += comma_fprintf(fp, &first, " (no trace field)");
-                       goto out;
-               }
-
-               printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
-
-               field = field->next;
-               while (field) {
-                       printed += comma_fprintf(fp, &first, "%s", field->name);
-                       field = field->next;
-               }
-       }
-out:
-       fputc('\n', fp);
-       return ++printed;
-}
-
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
                          char *msg, size_t msgsize)
 {
@@ -2397,10 +2325,18 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
                         "Probably the maximum number of open file descriptors has been reached.\n"
                         "Hint: Try again after reducing the number of events.\n"
                         "Hint: Try increasing the limit with 'ulimit -n <limit>'");
+       case ENOMEM:
+               if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 &&
+                   access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
+                       return scnprintf(msg, size,
+                                        "Not enough memory to setup event with callchain.\n"
+                                        "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
+                                        "Hint: Current value: %d", sysctl_perf_event_max_stack);
+               break;
        case ENODEV:
                if (target->cpu_list)
                        return scnprintf(msg, size, "%s",
-        "No such device - did you specify an out-of-range profile CPU?\n");
+        "No such device - did you specify an out-of-range profile CPU?");
                break;
        case EOPNOTSUPP:
                if (evsel->attr.precise_ip)
@@ -2432,7 +2368,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
        return scnprintf(msg, size,
        "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
        "/bin/dmesg may provide additional information.\n"
-       "No CONFIG_PERF_EVENTS=y kernel support configured?\n",
+       "No CONFIG_PERF_EVENTS=y kernel support configured?",
                         err, strerror_r(err, sbuf, sizeof(sbuf)),
                         perf_evsel__name(evsel));
 }
index 501ea6e565f13a4a4817947957c79f15d805d130..8a644fef452c07356530824f1f3af151c797fcd3 100644 (file)
@@ -178,8 +178,14 @@ void perf_evsel__init(struct perf_evsel *evsel,
 void perf_evsel__exit(struct perf_evsel *evsel);
 void perf_evsel__delete(struct perf_evsel *evsel);
 
+struct callchain_param;
+
 void perf_evsel__config(struct perf_evsel *evsel,
-                       struct record_opts *opts);
+                       struct record_opts *opts,
+                       struct callchain_param *callchain);
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+                                 struct record_opts *opts,
+                                 struct callchain_param *callchain);
 
 int __perf_evsel__sample_size(u64 sample_type);
 void perf_evsel__calc_id_pos(struct perf_evsel *evsel);
@@ -381,6 +387,24 @@ struct perf_attr_details {
 int perf_evsel__fprintf(struct perf_evsel *evsel,
                        struct perf_attr_details *details, FILE *fp);
 
+#define EVSEL__PRINT_IP                        (1<<0)
+#define EVSEL__PRINT_SYM               (1<<1)
+#define EVSEL__PRINT_DSO               (1<<2)
+#define EVSEL__PRINT_SYMOFFSET         (1<<3)
+#define EVSEL__PRINT_ONELINE           (1<<4)
+#define EVSEL__PRINT_SRCLINE           (1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR   (1<<6)
+
+struct callchain_cursor;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+                             unsigned int print_opts,
+                             struct callchain_cursor *cursor, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+                       int left_alignment, unsigned int print_opts,
+                       struct callchain_cursor *cursor, FILE *fp);
+
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
                          char *msg, size_t msgsize);
 int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
@@ -396,7 +420,7 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node);  \
      (_evsel) && (_evsel)->leader == (_leader);                                        \
      (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
 
-static inline bool has_branch_callstack(struct perf_evsel *evsel)
+static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel)
 {
        return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
 }
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
new file mode 100644 (file)
index 0000000..3674e77
--- /dev/null
@@ -0,0 +1,212 @@
+#include <stdio.h>
+#include <stdbool.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "callchain.h"
+#include "map.h"
+#include "symbol.h"
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+       va_list args;
+       int ret = 0;
+
+       if (!*first) {
+               ret += fprintf(fp, ",");
+       } else {
+               ret += fprintf(fp, ":");
+               *first = false;
+       }
+
+       va_start(args, fmt);
+       ret += vfprintf(fp, fmt, args);
+       va_end(args);
+       return ret;
+}
+
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
+{
+       return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
+}
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+                       struct perf_attr_details *details, FILE *fp)
+{
+       bool first = true;
+       int printed = 0;
+
+       if (details->event_group) {
+               struct perf_evsel *pos;
+
+               if (!perf_evsel__is_group_leader(evsel))
+                       return 0;
+
+               if (evsel->nr_members > 1)
+                       printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+               printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+               for_each_group_member(pos, evsel)
+                       printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+
+               if (evsel->nr_members > 1)
+                       printed += fprintf(fp, "}");
+               goto out;
+       }
+
+       printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+
+       if (details->verbose) {
+               printed += perf_event_attr__fprintf(fp, &evsel->attr,
+                                                   __print_attr__fprintf, &first);
+       } else if (details->freq) {
+               const char *term = "sample_freq";
+
+               if (!evsel->attr.freq)
+                       term = "sample_period";
+
+               printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+                                        term, (u64)evsel->attr.sample_freq);
+       }
+
+       if (details->trace_fields) {
+               struct format_field *field;
+
+               if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+                       printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+                       goto out;
+               }
+
+               field = evsel->tp_format->format.fields;
+               if (field == NULL) {
+                       printed += comma_fprintf(fp, &first, " (no trace field)");
+                       goto out;
+               }
+
+               printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+               field = field->next;
+               while (field) {
+                       printed += comma_fprintf(fp, &first, "%s", field->name);
+                       field = field->next;
+               }
+       }
+out:
+       fputc('\n', fp);
+       return ++printed;
+}
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+                             unsigned int print_opts, struct callchain_cursor *cursor,
+                             FILE *fp)
+{
+       int printed = 0;
+       struct callchain_cursor_node *node;
+       int print_ip = print_opts & EVSEL__PRINT_IP;
+       int print_sym = print_opts & EVSEL__PRINT_SYM;
+       int print_dso = print_opts & EVSEL__PRINT_DSO;
+       int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+       int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
+       int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+       int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+       char s = print_oneline ? ' ' : '\t';
+
+       if (sample->callchain) {
+               struct addr_location node_al;
+
+               callchain_cursor_commit(cursor);
+
+               while (1) {
+                       u64 addr = 0;
+
+                       node = callchain_cursor_current(cursor);
+                       if (!node)
+                               break;
+
+                       if (node->sym && node->sym->ignore)
+                               goto next;
+
+                       printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+                       if (print_ip)
+                               printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
+
+                       if (node->map)
+                               addr = node->map->map_ip(node->map, node->ip);
+
+                       if (print_sym) {
+                               printed += fprintf(fp, " ");
+                               node_al.addr = addr;
+                               node_al.map  = node->map;
+
+                               if (print_symoffset) {
+                                       printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
+                                                                                 print_unknown_as_addr, fp);
+                               } else {
+                                       printed += __symbol__fprintf_symname(node->sym, &node_al,
+                                                                            print_unknown_as_addr, fp);
+                               }
+                       }
+
+                       if (print_dso) {
+                               printed += fprintf(fp, " (");
+                               printed += map__fprintf_dsoname(node->map, fp);
+                               printed += fprintf(fp, ")");
+                       }
+
+                       if (print_srcline)
+                               printed += map__fprintf_srcline(node->map, addr, "\n  ", fp);
+
+                       if (!print_oneline)
+                               printed += fprintf(fp, "\n");
+next:
+                       callchain_cursor_advance(cursor);
+               }
+       }
+
+       return printed;
+}
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+                       int left_alignment, unsigned int print_opts,
+                       struct callchain_cursor *cursor, FILE *fp)
+{
+       int printed = 0;
+       int print_ip = print_opts & EVSEL__PRINT_IP;
+       int print_sym = print_opts & EVSEL__PRINT_SYM;
+       int print_dso = print_opts & EVSEL__PRINT_DSO;
+       int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+       int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+       int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+       if (cursor != NULL) {
+               printed += sample__fprintf_callchain(sample, left_alignment,
+                                                    print_opts, cursor, fp);
+       } else if (!(al->sym && al->sym->ignore)) {
+               printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+               if (print_ip)
+                       printed += fprintf(fp, "%16" PRIx64, sample->ip);
+
+               if (print_sym) {
+                       printed += fprintf(fp, " ");
+                       if (print_symoffset) {
+                               printed += __symbol__fprintf_symname_offs(al->sym, al,
+                                                                         print_unknown_as_addr, fp);
+                       } else {
+                               printed += __symbol__fprintf_symname(al->sym, al,
+                                                                    print_unknown_as_addr, fp);
+                       }
+               }
+
+               if (print_dso) {
+                       printed += fprintf(fp, " (");
+                       printed += map__fprintf_dsoname(al->map, fp);
+                       printed += fprintf(fp, ")");
+               }
+
+               if (print_srcline)
+                       printed += map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
+       }
+
+       return printed;
+}
index 31c4641fe5ff06641588bef5e4448641685e49f5..cfab531437c743c4c849d40fd45545fcb2fbcddc 100644 (file)
@@ -295,7 +295,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
                root_in  = &he->parent_he->hroot_in;
                root_out = &he->parent_he->hroot_out;
        } else {
-               if (sort__need_collapse)
+               if (hists__has(hists, need_collapse))
                        root_in = &hists->entries_collapsed;
                else
                        root_in = hists->entries_in;
@@ -953,7 +953,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 {
        int err, err2;
 
-       err = sample__resolve_callchain(iter->sample, &iter->parent,
+       err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
                                        iter->evsel, al, max_stack_depth);
        if (err)
                return err;
@@ -1295,8 +1295,9 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
        return ret;
 }
 
-int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
-                                struct hist_entry *he)
+static int hists__collapse_insert_entry(struct hists *hists,
+                                       struct rb_root *root,
+                                       struct hist_entry *he)
 {
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
@@ -1372,7 +1373,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
        struct hist_entry *n;
        int ret;
 
-       if (!sort__need_collapse)
+       if (!hists__has(hists, need_collapse))
                return 0;
 
        hists->nr_entries = 0;
@@ -1631,7 +1632,7 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
                return;
        }
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root = &hists->entries_collapsed;
        else
                root = hists->entries_in;
@@ -2035,7 +2036,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
        struct hist_entry *he;
        int64_t cmp;
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                root = &hists->entries_collapsed;
        else
                root = hists->entries_in;
@@ -2061,6 +2062,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
        if (he) {
                memset(&he->stat, 0, sizeof(he->stat));
                he->hists = hists;
+               if (symbol_conf.cumulate_callchain)
+                       memset(he->stat_acc, 0, sizeof(he->stat));
                rb_link_node(&he->rb_node_in, parent, p);
                rb_insert_color(&he->rb_node_in, root);
                hists__inc_stats(hists, he);
@@ -2075,7 +2078,7 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
 {
        struct rb_node *n;
 
-       if (sort__need_collapse)
+       if (hists__has(hists, need_collapse))
                n = hists->entries_collapsed.rb_node;
        else
                n = hists->entries_in->rb_node;
@@ -2104,7 +2107,7 @@ void hists__match(struct hists *leader, struct hists *other)
        struct rb_node *nd;
        struct hist_entry *pos, *pair;
 
-       if (sort__need_collapse)
+       if (hists__has(leader, need_collapse))
                root = &leader->entries_collapsed;
        else
                root = leader->entries_in;
@@ -2129,7 +2132,7 @@ int hists__link(struct hists *leader, struct hists *other)
        struct rb_node *nd;
        struct hist_entry *pos, *pair;
 
-       if (sort__need_collapse)
+       if (hists__has(other, need_collapse))
                root = &other->entries_collapsed;
        else
                root = other->entries_in;
index bec0cd660fbd60d74cebaa6be0b5335871979c47..0f84bfb42bb1378c3b21aabee90ed9cd5e44c02f 100644 (file)
@@ -82,6 +82,8 @@ struct hists {
        int                     nr_hpp_node;
 };
 
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
 struct hist_entry_iter;
 
 struct hist_iter_ops {
@@ -199,8 +201,6 @@ int hists__init(void);
 int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
-int hists__collapse_insert_entry(struct hists *hists,
-                                 struct rb_root *root, struct hist_entry *he);
 
 struct perf_hpp {
        char *buf;
@@ -240,6 +240,14 @@ struct perf_hpp_fmt {
 struct perf_hpp_list {
        struct list_head fields;
        struct list_head sorts;
+
+       int need_collapse;
+       int parent;
+       int sym;
+       int dso;
+       int socket;
+       int thread;
+       int comm;
 };
 
 extern struct perf_hpp_list perf_hpp_list;
index abf1366e2a24d3bcf439434f96075320000e49a9..9df99608556332289b499622f11a5a402e6b875a 100644 (file)
@@ -66,6 +66,7 @@ struct intel_bts {
        u64                             branches_id;
        size_t                          branches_event_size;
        bool                            synth_needs_swap;
+       unsigned long                   num_events;
 };
 
 struct intel_bts_queue {
@@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
        union perf_event event;
        struct perf_sample sample = { .ip = 0, };
 
+       if (bts->synth_opts.initial_skip &&
+           bts->num_events++ <= bts->synth_opts.initial_skip)
+               return 0;
+
        event.sample.header.type = PERF_RECORD_SAMPLE;
        event.sample.header.misc = PERF_RECORD_MISC_USER;
        event.sample.header.size = sizeof(struct perf_event_header);
index 9409d014b46c713de02df828cf9ad7ff726881e8..9c8f15da86ce8ad8f735815cb3c0f55ca10d7167 100644 (file)
@@ -356,7 +356,7 @@ static const char *intel_pt_err_msgs[] = {
 
 int intel_pt__strerror(int code, char *buf, size_t buflen)
 {
-       if (code < 1 || code > INTEL_PT_ERR_MAX)
+       if (code < 1 || code >= INTEL_PT_ERR_MAX)
                code = INTEL_PT_ERR_UNK;
        strlcpy(buf, intel_pt_err_msgs[code], buflen);
        return 0;
index 6175784409896425154dcfd9c49560e9f2cb7e28..137196990012727781728b080cb5c4b799e3332d 100644 (file)
@@ -100,6 +100,8 @@ struct intel_pt {
        u64 cyc_bit;
        u64 noretcomp_bit;
        unsigned max_non_turbo_ratio;
+
+       unsigned long num_events;
 };
 
 enum switch_state {
@@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
        if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
                return 0;
 
+       if (pt->synth_opts.initial_skip &&
+           pt->num_events++ < pt->synth_opts.initial_skip)
+               return 0;
+
        event->sample.header.type = PERF_RECORD_SAMPLE;
        event->sample.header.misc = PERF_RECORD_MISC_USER;
        event->sample.header.size = sizeof(struct perf_event_header);
@@ -1029,6 +1035,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
        union perf_event *event = ptq->event_buf;
        struct perf_sample sample = { .ip = 0, };
 
+       if (pt->synth_opts.initial_skip &&
+           pt->num_events++ < pt->synth_opts.initial_skip)
+               return 0;
+
        event->sample.header.type = PERF_RECORD_SAMPLE;
        event->sample.header.misc = PERF_RECORD_MISC_USER;
        event->sample.header.size = sizeof(struct perf_event_header);
@@ -1087,6 +1097,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
        union perf_event *event = ptq->event_buf;
        struct perf_sample sample = { .ip = 0, };
 
+       if (pt->synth_opts.initial_skip &&
+           pt->num_events++ < pt->synth_opts.initial_skip)
+               return 0;
+
        event->sample.header.type = PERF_RECORD_SAMPLE;
        event->sample.header.misc = PERF_RECORD_MISC_USER;
        event->sample.header.size = sizeof(struct perf_event_header);
@@ -1199,14 +1213,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
        ptq->have_sample = false;
 
        if (pt->sample_instructions &&
-           (state->type & INTEL_PT_INSTRUCTION)) {
+           (state->type & INTEL_PT_INSTRUCTION) &&
+           (!pt->synth_opts.initial_skip ||
+            pt->num_events++ >= pt->synth_opts.initial_skip)) {
                err = intel_pt_synth_instruction_sample(ptq);
                if (err)
                        return err;
        }
 
        if (pt->sample_transactions &&
-           (state->type & INTEL_PT_TRANSACTION)) {
+           (state->type & INTEL_PT_TRANSACTION) &&
+           (!pt->synth_opts.initial_skip ||
+            pt->num_events++ >= pt->synth_opts.initial_skip)) {
                err = intel_pt_synth_transaction_sample(ptq);
                if (err)
                        return err;
index ad0c0bb1fbc78f5d131a8b831ecaf2e323ce0150..86afe9618bb0d917306ff3912203f81b6fee40b4 100644 (file)
@@ -17,6 +17,7 @@
 #include "strlist.h"
 #include <elf.h>
 
+#include "tsc.h"
 #include "session.h"
 #include "jit.h"
 #include "jitdump.h"
@@ -33,6 +34,7 @@ struct jit_buf_desc {
        size_t           bufsize;
        FILE             *in;
        bool             needs_bswap; /* handles cross-endianess */
+       bool             use_arch_timestamp;
        void             *debug_data;
        size_t           nr_debug_entries;
        uint32_t         code_load_count;
@@ -158,13 +160,16 @@ jit_open(struct jit_buf_desc *jd, const char *name)
                header.flags      = bswap_64(header.flags);
        }
 
+       jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
        if (verbose > 2)
-               pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n",
+               pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
                        header.version,
                        header.total_size,
                        (unsigned long long)header.timestamp,
                        header.pid,
-                       header.elf_mach);
+                       header.elf_mach,
+                       jd->use_arch_timestamp);
 
        if (header.flags & JITDUMP_FLAGS_RESERVED) {
                pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
@@ -172,10 +177,15 @@ jit_open(struct jit_buf_desc *jd, const char *name)
                goto error;
        }
 
+       if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) {
+               pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
+               goto error;
+       }
+
        /*
         * validate event is using the correct clockid
         */
-       if (jit_validate_events(jd->session)) {
+       if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) {
                pr_err("error, jitted code must be sampled with perf record -k 1\n");
                goto error;
        }
@@ -329,6 +339,23 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
        return 0;
 }
 
+static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
+{
+       struct perf_tsc_conversion tc;
+
+       if (!jd->use_arch_timestamp)
+               return timestamp;
+
+       tc.time_shift = jd->session->time_conv.time_shift;
+       tc.time_mult  = jd->session->time_conv.time_mult;
+       tc.time_zero  = jd->session->time_conv.time_zero;
+
+       if (!tc.time_mult)
+               return 0;
+
+       return tsc_to_perf_time(timestamp, &tc);
+}
+
 static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
 {
        struct perf_sample sample;
@@ -385,7 +412,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
                return -1;
        }
        if (stat(filename, &st))
-               memset(&st, 0, sizeof(stat));
+               memset(&st, 0, sizeof(st));
 
        event->mmap2.header.type = PERF_RECORD_MMAP2;
        event->mmap2.header.misc = PERF_RECORD_MISC_USER;
@@ -410,7 +437,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
                id->tid  = tid;
        }
        if (jd->sample_type & PERF_SAMPLE_TIME)
-               id->time = jr->load.p.timestamp;
+               id->time = convert_timestamp(jd, jr->load.p.timestamp);
 
        /*
         * create pseudo sample to induce dso hit increment
@@ -473,7 +500,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
        size++; /* for \0 */
 
        if (stat(filename, &st))
-               memset(&st, 0, sizeof(stat));
+               memset(&st, 0, sizeof(st));
 
        size = PERF_ALIGN(size, sizeof(u64));
 
@@ -499,7 +526,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
                id->tid  = tid;
        }
        if (jd->sample_type & PERF_SAMPLE_TIME)
-               id->time = jr->load.p.timestamp;
+               id->time = convert_timestamp(jd, jr->load.p.timestamp);
 
        /*
         * create pseudo sample to induce dso hit increment
index b66c1f503d9edbdd939d8f80b720e128abc15620..bcacd20d0c1c709ed3f983c4792393d851fa47d0 100644 (file)
 #define JITHEADER_VERSION 1
 
 enum jitdump_flags_bits {
+       JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT,
        JITDUMP_FLAGS_MAX_BIT,
 };
 
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP   (1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT)
+
 #define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
                                (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
 
index 80b9b6a87990b29e4722a956e62814130c1290de..639a2903065eec287bce323a08c59a30594cee36 100644 (file)
@@ -32,6 +32,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 
        machine->threads = RB_ROOT;
        pthread_rwlock_init(&machine->threads_lock, NULL);
+       machine->nr_threads = 0;
        INIT_LIST_HEAD(&machine->dead_threads);
        machine->last_match = NULL;
 
@@ -430,6 +431,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
                 */
                thread__get(th);
                machine->last_match = th;
+               ++machine->nr_threads;
        }
 
        return th;
@@ -681,11 +683,13 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
 
 size_t machine__fprintf(struct machine *machine, FILE *fp)
 {
-       size_t ret = 0;
+       size_t ret;
        struct rb_node *nd;
 
        pthread_rwlock_rdlock(&machine->threads_lock);
 
+       ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+
        for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
                struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
@@ -908,11 +912,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
        return machine__create_kernel_maps(machine);
 }
 
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-                          enum map_type type, symbol_filter_t filter)
+int __machine__load_kallsyms(struct machine *machine, const char *filename,
+                            enum map_type type, bool no_kcore, symbol_filter_t filter)
 {
        struct map *map = machine__kernel_map(machine);
-       int ret = dso__load_kallsyms(map->dso, filename, map, filter);
+       int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter);
 
        if (ret > 0) {
                dso__set_loaded(map->dso, type);
@@ -927,6 +931,12 @@ int machine__load_kallsyms(struct machine *machine, const char *filename,
        return ret;
 }
 
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+                          enum map_type type, symbol_filter_t filter)
+{
+       return __machine__load_kallsyms(machine, filename, type, false, filter);
+}
+
 int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
                               symbol_filter_t filter)
 {
@@ -1413,6 +1423,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
                pthread_rwlock_wrlock(&machine->threads_lock);
        rb_erase_init(&th->rb_node, &machine->threads);
        RB_CLEAR_NODE(&th->rb_node);
+       --machine->nr_threads;
        /*
         * Move it first to the dead_threads list, then drop the reference,
         * if this is the last reference, then the thread__delete destructor
@@ -1599,6 +1610,7 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 }
 
 static int add_callchain_ip(struct thread *thread,
+                           struct callchain_cursor *cursor,
                            struct symbol **parent,
                            struct addr_location *root_al,
                            u8 *cpumode,
@@ -1630,7 +1642,7 @@ static int add_callchain_ip(struct thread *thread,
                                 * It seems the callchain is corrupted.
                                 * Discard all.
                                 */
-                               callchain_cursor_reset(&callchain_cursor);
+                               callchain_cursor_reset(cursor);
                                return 1;
                        }
                        return 0;
@@ -1640,7 +1652,7 @@ static int add_callchain_ip(struct thread *thread,
        }
 
        if (al.sym != NULL) {
-               if (sort__has_parent && !*parent &&
+               if (perf_hpp_list.parent && !*parent &&
                    symbol__match_regex(al.sym, &parent_regex))
                        *parent = al.sym;
                else if (have_ignore_callees && root_al &&
@@ -1648,13 +1660,13 @@ static int add_callchain_ip(struct thread *thread,
                        /* Treat this symbol as the root,
                           forgetting its callees. */
                        *root_al = al;
-                       callchain_cursor_reset(&callchain_cursor);
+                       callchain_cursor_reset(cursor);
                }
        }
 
        if (symbol_conf.hide_unresolved && al.sym == NULL)
                return 0;
-       return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
+       return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1724,6 +1736,7 @@ static int remove_loops(struct branch_entry *l, int nr)
  * negative error code on other errors.
  */
 static int resolve_lbr_callchain_sample(struct thread *thread,
+                                       struct callchain_cursor *cursor,
                                        struct perf_sample *sample,
                                        struct symbol **parent,
                                        struct addr_location *root_al,
@@ -1756,7 +1769,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
                 */
                int mix_chain_nr = i + 1 + lbr_nr + 1;
 
-               if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) {
+               if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) {
                        pr_warning("corrupted callchain. skipping...\n");
                        return 0;
                }
@@ -1778,7 +1791,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
                                        ip = lbr_stack->entries[0].to;
                        }
 
-                       err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
+                       err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
                        if (err)
                                return (err < 0) ? err : 0;
                }
@@ -1789,6 +1802,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 }
 
 static int thread__resolve_callchain_sample(struct thread *thread,
+                                           struct callchain_cursor *cursor,
                                            struct perf_evsel *evsel,
                                            struct perf_sample *sample,
                                            struct symbol **parent,
@@ -1803,10 +1817,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
        int skip_idx = -1;
        int first_call = 0;
 
-       callchain_cursor_reset(&callchain_cursor);
-
-       if (has_branch_callstack(evsel)) {
-               err = resolve_lbr_callchain_sample(thread, sample, parent,
+       if (perf_evsel__has_branch_callstack(evsel)) {
+               err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
                                                   root_al, max_stack);
                if (err)
                        return (err < 0) ? err : 0;
@@ -1816,7 +1828,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
         * Based on DWARF debug information, some architectures skip
         * a callchain entry saved by the kernel.
         */
-       if (chain->nr < PERF_MAX_STACK_DEPTH)
+       if (chain->nr < sysctl_perf_event_max_stack)
                skip_idx = arch_skip_callchain_idx(thread, chain);
 
        /*
@@ -1863,10 +1875,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
                nr = remove_loops(be, nr);
 
                for (i = 0; i < nr; i++) {
-                       err = add_callchain_ip(thread, parent, root_al,
+                       err = add_callchain_ip(thread, cursor, parent, root_al,
                                               NULL, be[i].to);
                        if (!err)
-                               err = add_callchain_ip(thread, parent, root_al,
+                               err = add_callchain_ip(thread, cursor, parent, root_al,
                                                       NULL, be[i].from);
                        if (err == -EINVAL)
                                break;
@@ -1877,7 +1889,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
        }
 
 check_calls:
-       if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
+       if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) {
                pr_warning("corrupted callchain. skipping...\n");
                return 0;
        }
@@ -1896,7 +1908,7 @@ check_calls:
 #endif
                ip = chain->ips[j];
 
-               err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
+               err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
 
                if (err)
                        return (err < 0) ? err : 0;
@@ -1915,19 +1927,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
                                       entry->map, entry->sym);
 }
 
-int thread__resolve_callchain(struct thread *thread,
-                             struct perf_evsel *evsel,
-                             struct perf_sample *sample,
-                             struct symbol **parent,
-                             struct addr_location *root_al,
-                             int max_stack)
+static int thread__resolve_callchain_unwind(struct thread *thread,
+                                           struct callchain_cursor *cursor,
+                                           struct perf_evsel *evsel,
+                                           struct perf_sample *sample,
+                                           int max_stack)
 {
-       int ret = thread__resolve_callchain_sample(thread, evsel,
-                                                  sample, parent,
-                                                  root_al, max_stack);
-       if (ret)
-               return ret;
-
        /* Can we do dwarf post unwind? */
        if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
              (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
@@ -1938,9 +1943,45 @@ int thread__resolve_callchain(struct thread *thread,
            (!sample->user_stack.size))
                return 0;
 
-       return unwind__get_entries(unwind_entry, &callchain_cursor,
+       return unwind__get_entries(unwind_entry, cursor,
                                   thread, sample, max_stack);
+}
+
+int thread__resolve_callchain(struct thread *thread,
+                             struct callchain_cursor *cursor,
+                             struct perf_evsel *evsel,
+                             struct perf_sample *sample,
+                             struct symbol **parent,
+                             struct addr_location *root_al,
+                             int max_stack)
+{
+       int ret = 0;
+
+       callchain_cursor_reset(&callchain_cursor);
 
+       if (callchain_param.order == ORDER_CALLEE) {
+               ret = thread__resolve_callchain_sample(thread, cursor,
+                                                      evsel, sample,
+                                                      parent, root_al,
+                                                      max_stack);
+               if (ret)
+                       return ret;
+               ret = thread__resolve_callchain_unwind(thread, cursor,
+                                                      evsel, sample,
+                                                      max_stack);
+       } else {
+               ret = thread__resolve_callchain_unwind(thread, cursor,
+                                                      evsel, sample,
+                                                      max_stack);
+               if (ret)
+                       return ret;
+               ret = thread__resolve_callchain_sample(thread, cursor,
+                                                      evsel, sample,
+                                                      parent, root_al,
+                                                      max_stack);
+       }
+
+       return ret;
 }
 
 int machine__for_each_thread(struct machine *machine,
index 8499db2811583000e5181f1fc91f94c5e5f9f418..83f46790c52f7e3008427577c52288ed5f15e86b 100644 (file)
@@ -31,6 +31,7 @@ struct machine {
        char              *root_dir;
        struct rb_root    threads;
        pthread_rwlock_t  threads_lock;
+       unsigned int      nr_threads;
        struct list_head  dead_threads;
        struct thread     *last_match;
        struct vdso_info  *vdso_info;
@@ -141,7 +142,11 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
                                           struct addr_location *al);
 struct mem_info *sample__resolve_mem(struct perf_sample *sample,
                                     struct addr_location *al);
+
+struct callchain_cursor;
+
 int thread__resolve_callchain(struct thread *thread,
+                             struct callchain_cursor *cursor,
                              struct perf_evsel *evsel,
                              struct perf_sample *sample,
                              struct symbol **parent,
@@ -211,6 +216,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
 struct map *machine__findnew_module_map(struct machine *machine, u64 start,
                                        const char *filename);
 
+int __machine__load_kallsyms(struct machine *machine, const char *filename,
+                            enum map_type type, bool no_kcore, symbol_filter_t filter);
 int machine__load_kallsyms(struct machine *machine, const char *filename,
                           enum map_type type, symbol_filter_t filter);
 int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
index 171b6d10a04b6d9fe4626b6af8725beee924adf0..02c31865648b11aaf6569ce34014ce16f602e9f0 100644 (file)
@@ -431,6 +431,13 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
        if (map->dso->rel)
                return rip - map->pgoff;
 
+       /*
+        * kernel modules also have DSO_TYPE_USER in dso->kernel,
+        * but all kernel modules are ET_REL, so won't get here.
+        */
+       if (map->dso->kernel == DSO_TYPE_USER)
+               return rip + map->dso->text_offset;
+
        return map->unmap_ip(map, rip) - map->reloc;
 }
 
@@ -454,6 +461,13 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
        if (map->dso->rel)
                return map->unmap_ip(map, ip + map->pgoff);
 
+       /*
+        * kernel modules also have DSO_TYPE_USER in dso->kernel,
+        * but all kernel modules are ET_REL, so won't get here.
+        */
+       if (map->dso->kernel == DSO_TYPE_USER)
+               return map->unmap_ip(map, ip - map->dso->text_offset);
+
        return ip + map->reloc;
 }
 
index b1b9e2385f4b3e0f96d9137e4b3278ba0d650800..fe84df1875aa9e231d63d56ba3417f242ea4e3b7 100644 (file)
@@ -308,3 +308,12 @@ void ordered_events__free(struct ordered_events *oe)
                free(event);
        }
 }
+
+void ordered_events__reinit(struct ordered_events *oe)
+{
+       ordered_events__deliver_t old_deliver = oe->deliver;
+
+       ordered_events__free(oe);
+       memset(oe, '\0', sizeof(*oe));
+       ordered_events__init(oe, old_deliver);
+}
index f403991e3bfd8d7c8a8d892344223316890836be..e11468a9a6e40883f6d0450a64acb9cb4ce6abac 100644 (file)
@@ -49,6 +49,7 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve
 int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
 void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
 void ordered_events__free(struct ordered_events *oe);
+void ordered_events__reinit(struct ordered_events *oe);
 
 static inline
 void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
index 4c19d5e79d8c4d626eb3fa91486cc1d83447aeeb..bcbc983d4b12215dc1045fb0542597f08db38b45 100644 (file)
@@ -138,11 +138,11 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 #define PERF_EVENT_TYPE(config)                __PERF_EVENT_FIELD(config, TYPE)
 #define PERF_EVENT_ID(config)          __PERF_EVENT_FIELD(config, EVENT)
 
-#define for_each_subsystem(sys_dir, sys_dirent, sys_next)             \
-       while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next)        \
-       if (sys_dirent.d_type == DT_DIR &&                                     \
-          (strcmp(sys_dirent.d_name, ".")) &&                                 \
-          (strcmp(sys_dirent.d_name, "..")))
+#define for_each_subsystem(sys_dir, sys_dirent)                        \
+       while ((sys_dirent = readdir(sys_dir)) != NULL)         \
+               if (sys_dirent->d_type == DT_DIR &&             \
+                   (strcmp(sys_dirent->d_name, ".")) &&        \
+                   (strcmp(sys_dirent->d_name, "..")))
 
 static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
 {
@@ -159,12 +159,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
        return 0;
 }
 
-#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next)             \
-       while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next)        \
-       if (evt_dirent.d_type == DT_DIR &&                                     \
-          (strcmp(evt_dirent.d_name, ".")) &&                                 \
-          (strcmp(evt_dirent.d_name, "..")) &&                                \
-          (!tp_event_has_id(&sys_dirent, &evt_dirent)))
+#define for_each_event(sys_dirent, evt_dir, evt_dirent)                \
+       while ((evt_dirent = readdir(evt_dir)) != NULL)         \
+               if (evt_dirent->d_type == DT_DIR &&             \
+                   (strcmp(evt_dirent->d_name, ".")) &&        \
+                   (strcmp(evt_dirent->d_name, "..")) &&       \
+                   (!tp_event_has_id(sys_dirent, evt_dirent)))
 
 #define MAX_EVENT_LENGTH 512
 
@@ -173,7 +173,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 {
        struct tracepoint_path *path = NULL;
        DIR *sys_dir, *evt_dir;
-       struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
+       struct dirent *sys_dirent, *evt_dirent;
        char id_buf[24];
        int fd;
        u64 id;
@@ -184,18 +184,18 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
        if (!sys_dir)
                return NULL;
 
-       for_each_subsystem(sys_dir, sys_dirent, sys_next) {
+       for_each_subsystem(sys_dir, sys_dirent) {
 
                snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-                        sys_dirent.d_name);
+                        sys_dirent->d_name);
                evt_dir = opendir(dir_path);
                if (!evt_dir)
                        continue;
 
-               for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
+               for_each_event(sys_dirent, evt_dir, evt_dirent) {
 
                        snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
-                                evt_dirent.d_name);
+                                evt_dirent->d_name);
                        fd = open(evt_path, O_RDONLY);
                        if (fd < 0)
                                continue;
@@ -220,9 +220,9 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
                                        free(path);
                                        return NULL;
                                }
-                               strncpy(path->system, sys_dirent.d_name,
+                               strncpy(path->system, sys_dirent->d_name,
                                        MAX_EVENT_LENGTH);
-                               strncpy(path->name, evt_dirent.d_name,
+                               strncpy(path->name, evt_dirent->d_name,
                                        MAX_EVENT_LENGTH);
                                return path;
                        }
@@ -1812,7 +1812,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
                             bool name_only)
 {
        DIR *sys_dir, *evt_dir;
-       struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
+       struct dirent *sys_dirent, *evt_dirent;
        char evt_path[MAXPATHLEN];
        char dir_path[MAXPATHLEN];
        char **evt_list = NULL;
@@ -1830,20 +1830,20 @@ restart:
                        goto out_close_sys_dir;
        }
 
-       for_each_subsystem(sys_dir, sys_dirent, sys_next) {
+       for_each_subsystem(sys_dir, sys_dirent) {
                if (subsys_glob != NULL &&
-                   !strglobmatch(sys_dirent.d_name, subsys_glob))
+                   !strglobmatch(sys_dirent->d_name, subsys_glob))
                        continue;
 
                snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-                        sys_dirent.d_name);
+                        sys_dirent->d_name);
                evt_dir = opendir(dir_path);
                if (!evt_dir)
                        continue;
 
-               for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
+               for_each_event(sys_dirent, evt_dir, evt_dirent) {
                        if (event_glob != NULL &&
-                           !strglobmatch(evt_dirent.d_name, event_glob))
+                           !strglobmatch(evt_dirent->d_name, event_glob))
                                continue;
 
                        if (!evt_num_known) {
@@ -1852,7 +1852,7 @@ restart:
                        }
 
                        snprintf(evt_path, MAXPATHLEN, "%s:%s",
-                                sys_dirent.d_name, evt_dirent.d_name);
+                                sys_dirent->d_name, evt_dirent->d_name);
 
                        evt_list[evt_i] = strdup(evt_path);
                        if (evt_list[evt_i] == NULL)
@@ -1905,7 +1905,7 @@ out_close_sys_dir:
 int is_valid_tracepoint(const char *event_string)
 {
        DIR *sys_dir, *evt_dir;
-       struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
+       struct dirent *sys_dirent, *evt_dirent;
        char evt_path[MAXPATHLEN];
        char dir_path[MAXPATHLEN];
 
@@ -1913,17 +1913,17 @@ int is_valid_tracepoint(const char *event_string)
        if (!sys_dir)
                return 0;
 
-       for_each_subsystem(sys_dir, sys_dirent, sys_next) {
+       for_each_subsystem(sys_dir, sys_dirent) {
 
                snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-                        sys_dirent.d_name);
+                        sys_dirent->d_name);
                evt_dir = opendir(dir_path);
                if (!evt_dir)
                        continue;
 
-               for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
+               for_each_event(sys_dirent, evt_dir, evt_dirent) {
                        snprintf(evt_path, MAXPATHLEN, "%s:%s",
-                                sys_dirent.d_name, evt_dirent.d_name);
+                                sys_dirent->d_name, evt_dirent->d_name);
                        if (!strcmp(evt_path, event_string)) {
                                closedir(evt_dir);
                                closedir(sys_dir);
index adef23b1352e836fec9f0f9a5290c578bb25cc43..bf34468a99cbca4f0891827578ed7df831d0e9ef 100644 (file)
@@ -602,14 +602,13 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
 
 static __u64 pmu_format_max_value(const unsigned long *format)
 {
-       int w;
+       __u64 w = 0;
+       int fbit;
 
-       w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
-       if (!w)
-               return 0;
-       if (w < 64)
-               return (1ULL << w) - 1;
-       return -1;
+       for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
+               w |= (1ULL << fbit);
+
+       return w;
 }
 
 /*
index 8319fbb0863618f9d5bff52f17559dc2ab23e2b7..c82c625395ab955185fe609a3deb7f8a2a10351b 100644 (file)
@@ -265,6 +265,65 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
        return true;
 }
 
+/*
+ * NOTE:
+ * '.gnu.linkonce.this_module' section of kernel module elf directly
+ * maps to 'struct module' from linux/module.h. This section contains
+ * actual module name which will be used by kernel after loading it.
+ * But, we cannot use 'struct module' here since linux/module.h is not
+ * exposed to user-space. Offset of 'name' has remained same from long
+ * time, so hardcoding it here.
+ */
+#ifdef __LP64__
+#define MOD_NAME_OFFSET 24
+#else
+#define MOD_NAME_OFFSET 12
+#endif
+
+/*
+ * @module can be module name of module file path. In case of path,
+ * inspect elf and find out what is actual module name.
+ * Caller has to free mod_name after using it.
+ */
+static char *find_module_name(const char *module)
+{
+       int fd;
+       Elf *elf;
+       GElf_Ehdr ehdr;
+       GElf_Shdr shdr;
+       Elf_Data *data;
+       Elf_Scn *sec;
+       char *mod_name = NULL;
+
+       fd = open(module, O_RDONLY);
+       if (fd < 0)
+               return NULL;
+
+       elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+       if (elf == NULL)
+               goto elf_err;
+
+       if (gelf_getehdr(elf, &ehdr) == NULL)
+               goto ret_err;
+
+       sec = elf_section_by_name(elf, &ehdr, &shdr,
+                       ".gnu.linkonce.this_module", NULL);
+       if (!sec)
+               goto ret_err;
+
+       data = elf_getdata(sec, NULL);
+       if (!data || !data->d_buf)
+               goto ret_err;
+
+       mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
+
+ret_err:
+       elf_end(elf);
+elf_err:
+       close(fd);
+       return mod_name;
+}
+
 #ifdef HAVE_DWARF_SUPPORT
 
 static int kernel_get_module_dso(const char *module, struct dso **pdso)
@@ -486,8 +545,10 @@ static int get_text_start_address(const char *exec, unsigned long *address)
                return -errno;
 
        elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
-       if (elf == NULL)
-               return -EINVAL;
+       if (elf == NULL) {
+               ret = -EINVAL;
+               goto out_close;
+       }
 
        if (gelf_getehdr(elf, &ehdr) == NULL)
                goto out;
@@ -499,6 +560,9 @@ static int get_text_start_address(const char *exec, unsigned long *address)
        ret = 0;
 out:
        elf_end(elf);
+out_close:
+       close(fd);
+
        return ret;
 }
 
@@ -583,32 +647,23 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
                                            int ntevs, const char *module)
 {
        int i, ret = 0;
-       char *tmp;
+       char *mod_name = NULL;
 
        if (!module)
                return 0;
 
-       tmp = strrchr(module, '/');
-       if (tmp) {
-               /* This is a module path -- get the module name */
-               module = strdup(tmp + 1);
-               if (!module)
-                       return -ENOMEM;
-               tmp = strchr(module, '.');
-               if (tmp)
-                       *tmp = '\0';
-               tmp = (char *)module;   /* For free() */
-       }
+       mod_name = find_module_name(module);
 
        for (i = 0; i < ntevs; i++) {
-               tevs[i].point.module = strdup(module);
+               tevs[i].point.module =
+                       strdup(mod_name ? mod_name : module);
                if (!tevs[i].point.module) {
                        ret = -ENOMEM;
                        break;
                }
        }
 
-       free(tmp);
+       free(mod_name);
        return ret;
 }
 
@@ -1618,69 +1673,51 @@ out:
 }
 
 /* Compose only probe arg */
-int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa)
 {
        struct perf_probe_arg_field *field = pa->field;
-       int ret;
-       char *tmp = buf;
+       struct strbuf buf;
+       char *ret;
 
+       strbuf_init(&buf, 64);
        if (pa->name && pa->var)
-               ret = e_snprintf(tmp, len, "%s=%s", pa->name, pa->var);
+               strbuf_addf(&buf, "%s=%s", pa->name, pa->var);
        else
-               ret = e_snprintf(tmp, len, "%s", pa->name ? pa->name : pa->var);
-       if (ret <= 0)
-               goto error;
-       tmp += ret;
-       len -= ret;
+               strbuf_addstr(&buf, pa->name ?: pa->var);
 
        while (field) {
                if (field->name[0] == '[')
-                       ret = e_snprintf(tmp, len, "%s", field->name);
+                       strbuf_addstr(&buf, field->name);
                else
-                       ret = e_snprintf(tmp, len, "%s%s",
-                                        field->ref ? "->" : ".", field->name);
-               if (ret <= 0)
-                       goto error;
-               tmp += ret;
-               len -= ret;
+                       strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".",
+                                   field->name);
                field = field->next;
        }
 
-       if (pa->type) {
-               ret = e_snprintf(tmp, len, ":%s", pa->type);
-               if (ret <= 0)
-                       goto error;
-               tmp += ret;
-               len -= ret;
-       }
+       if (pa->type)
+               strbuf_addf(&buf, ":%s", pa->type);
+
+       ret = strbuf_detach(&buf, NULL);
 
-       return tmp - buf;
-error:
-       pr_debug("Failed to synthesize perf probe argument: %d\n", ret);
        return ret;
 }
 
 /* Compose only probe point (not argument) */
 static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
 {
-       char *buf, *tmp;
-       char offs[32] = "", line[32] = "", file[32] = "";
-       int ret, len;
-
-       buf = zalloc(MAX_CMDLEN);
-       if (buf == NULL) {
-               ret = -ENOMEM;
-               goto error;
-       }
-       if (pp->offset) {
-               ret = e_snprintf(offs, 32, "+%lu", pp->offset);
-               if (ret <= 0)
-                       goto error;
-       }
-       if (pp->line) {
-               ret = e_snprintf(line, 32, ":%d", pp->line);
-               if (ret <= 0)
-                       goto error;
+       struct strbuf buf;
+       char *tmp;
+       int len;
+
+       strbuf_init(&buf, 64);
+       if (pp->function) {
+               strbuf_addstr(&buf, pp->function);
+               if (pp->offset)
+                       strbuf_addf(&buf, "+%lu", pp->offset);
+               else if (pp->line)
+                       strbuf_addf(&buf, ":%d", pp->line);
+               else if (pp->retprobe)
+                       strbuf_addstr(&buf, "%return");
        }
        if (pp->file) {
                tmp = pp->file;
@@ -1689,25 +1726,12 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
                        tmp = strchr(pp->file + len - 30, '/');
                        tmp = tmp ? tmp + 1 : pp->file + len - 30;
                }
-               ret = e_snprintf(file, 32, "@%s", tmp);
-               if (ret <= 0)
-                       goto error;
+               strbuf_addf(&buf, "@%s", tmp);
+               if (!pp->function && pp->line)
+                       strbuf_addf(&buf, ":%d", pp->line);
        }
 
-       if (pp->function)
-               ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function,
-                                offs, pp->retprobe ? "%return" : "", line,
-                                file);
-       else
-               ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line);
-       if (ret <= 0)
-               goto error;
-
-       return buf;
-error:
-       pr_debug("Failed to synthesize perf probe point: %d\n", ret);
-       free(buf);
-       return NULL;
+       return strbuf_detach(&buf, NULL);
 }
 
 #if 0
@@ -1736,45 +1760,30 @@ char *synthesize_perf_probe_command(struct perf_probe_event *pev)
 #endif
 
 static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
-                                            char **buf, size_t *buflen,
-                                            int depth)
+                                           struct strbuf *buf, int depth)
 {
-       int ret;
        if (ref->next) {
                depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
-                                                        buflen, depth + 1);
+                                                        depth + 1);
                if (depth < 0)
                        goto out;
        }
-
-       ret = e_snprintf(*buf, *buflen, "%+ld(", ref->offset);
-       if (ret < 0)
-               depth = ret;
-       else {
-               *buf += ret;
-               *buflen -= ret;
-       }
+       strbuf_addf(buf, "%+ld(", ref->offset);
 out:
        return depth;
-
 }
 
 static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
-                                      char *buf, size_t buflen)
+                                     struct strbuf *buf)
 {
        struct probe_trace_arg_ref *ref = arg->ref;
-       int ret, depth = 0;
-       char *tmp = buf;
+       int depth = 0;
 
        /* Argument name or separator */
        if (arg->name)
-               ret = e_snprintf(buf, buflen, " %s=", arg->name);
+               strbuf_addf(buf, " %s=", arg->name);
        else
-               ret = e_snprintf(buf, buflen, " ");
-       if (ret < 0)
-               return ret;
-       buf += ret;
-       buflen -= ret;
+               strbuf_addch(buf, ' ');
 
        /* Special case: @XXX */
        if (arg->value[0] == '@' && arg->ref)
@@ -1782,60 +1791,41 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
 
        /* Dereferencing arguments */
        if (ref) {
-               depth = __synthesize_probe_trace_arg_ref(ref, &buf,
-                                                         &buflen, 1);
+               depth = __synthesize_probe_trace_arg_ref(ref, buf, 1);
                if (depth < 0)
                        return depth;
        }
 
        /* Print argument value */
        if (arg->value[0] == '@' && arg->ref)
-               ret = e_snprintf(buf, buflen, "%s%+ld", arg->value,
-                                arg->ref->offset);
+               strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset);
        else
-               ret = e_snprintf(buf, buflen, "%s", arg->value);
-       if (ret < 0)
-               return ret;
-       buf += ret;
-       buflen -= ret;
+               strbuf_addstr(buf, arg->value);
 
        /* Closing */
-       while (depth--) {
-               ret = e_snprintf(buf, buflen, ")");
-               if (ret < 0)
-                       return ret;
-               buf += ret;
-               buflen -= ret;
-       }
+       while (depth--)
+               strbuf_addch(buf, ')');
        /* Print argument type */
-       if (arg->type) {
-               ret = e_snprintf(buf, buflen, ":%s", arg->type);
-               if (ret <= 0)
-                       return ret;
-               buf += ret;
-       }
+       if (arg->type)
+               strbuf_addf(buf, ":%s", arg->type);
 
-       return buf - tmp;
+       return 0;
 }
 
 char *synthesize_probe_trace_command(struct probe_trace_event *tev)
 {
        struct probe_trace_point *tp = &tev->point;
-       char *buf;
-       int i, len, ret;
-
-       buf = zalloc(MAX_CMDLEN);
-       if (buf == NULL)
-               return NULL;
-
-       len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
-                        tev->group, tev->event);
-       if (len <= 0)
-               goto error;
+       struct strbuf buf;
+       char *ret = NULL;
+       int i;
 
        /* Uprobes must have tp->module */
        if (tev->uprobes && !tp->module)
-               goto error;
+               return NULL;
+
+       strbuf_init(&buf, 32);
+       strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
+                   tev->group, tev->event);
        /*
         * If tp->address == 0, then this point must be a
         * absolute address uprobe.
@@ -1849,34 +1839,23 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
 
        /* Use the tp->address for uprobes */
        if (tev->uprobes)
-               ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx",
-                                tp->module, tp->address);
+               strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
        else if (!strncmp(tp->symbol, "0x", 2))
                /* Absolute address. See try_to_find_absolute_address() */
-               ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx",
-                                tp->module ?: "", tp->module ? ":" : "",
-                                tp->address);
+               strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
+                           tp->module ? ":" : "", tp->address);
        else
-               ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu",
-                                tp->module ?: "", tp->module ? ":" : "",
-                                tp->symbol, tp->offset);
+               strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
+                           tp->module ? ":" : "", tp->symbol, tp->offset);
 
-       if (ret <= 0)
-               goto error;
-       len += ret;
-
-       for (i = 0; i < tev->nargs; i++) {
-               ret = synthesize_probe_trace_arg(&tev->args[i], buf + len,
-                                                 MAX_CMDLEN - len);
-               if (ret <= 0)
+       for (i = 0; i < tev->nargs; i++)
+               if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0)
                        goto error;
-               len += ret;
-       }
 
-       return buf;
+       ret = strbuf_detach(&buf, NULL);
 error:
-       free(buf);
-       return NULL;
+       strbuf_release(&buf);
+       return ret;
 }
 
 static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
@@ -1958,7 +1937,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp,
 static int convert_to_perf_probe_event(struct probe_trace_event *tev,
                               struct perf_probe_event *pev, bool is_kprobe)
 {
-       char buf[64] = "";
+       struct strbuf buf = STRBUF_INIT;
        int i, ret;
 
        /* Convert event/group name */
@@ -1981,9 +1960,9 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev,
                if (tev->args[i].name)
                        pev->args[i].name = strdup(tev->args[i].name);
                else {
-                       ret = synthesize_probe_trace_arg(&tev->args[i],
-                                                         buf, 64);
-                       pev->args[i].name = strdup(buf);
+                       strbuf_init(&buf, 32);
+                       ret = synthesize_probe_trace_arg(&tev->args[i], &buf);
+                       pev->args[i].name = strbuf_detach(&buf, NULL);
                }
                if (pev->args[i].name == NULL && ret >= 0)
                        ret = -ENOMEM;
@@ -2161,37 +2140,37 @@ static int perf_probe_event__sprintf(const char *group, const char *event,
                                     const char *module,
                                     struct strbuf *result)
 {
-       int i, ret;
-       char buf[128];
-       char *place;
+       int i;
+       char *buf;
 
-       /* Synthesize only event probe point */
-       place = synthesize_perf_probe_point(&pev->point);
-       if (!place)
-               return -EINVAL;
+       if (asprintf(&buf, "%s:%s", group, event) < 0)
+               return -errno;
+       strbuf_addf(result, "  %-20s (on ", buf);
+       free(buf);
 
-       ret = e_snprintf(buf, 128, "%s:%s", group, event);
-       if (ret < 0)
-               goto out;
+       /* Synthesize only event probe point */
+       buf = synthesize_perf_probe_point(&pev->point);
+       if (!buf)
+               return -ENOMEM;
+       strbuf_addstr(result, buf);
+       free(buf);
 
-       strbuf_addf(result, "  %-20s (on %s", buf, place);
        if (module)
                strbuf_addf(result, " in %s", module);
 
        if (pev->nargs > 0) {
                strbuf_add(result, " with", 5);
                for (i = 0; i < pev->nargs; i++) {
-                       ret = synthesize_perf_probe_arg(&pev->args[i],
-                                                       buf, 128);
-                       if (ret < 0)
-                               goto out;
+                       buf = synthesize_perf_probe_arg(&pev->args[i]);
+                       if (!buf)
+                               return -ENOMEM;
                        strbuf_addf(result, " %s", buf);
+                       free(buf);
                }
        }
        strbuf_addch(result, ')');
-out:
-       free(place);
-       return ret;
+
+       return 0;
 }
 
 /* Show an event */
@@ -2498,7 +2477,8 @@ static int find_probe_functions(struct map *map, char *name,
 
 void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
                                struct probe_trace_event *tev __maybe_unused,
-                               struct map *map __maybe_unused) { }
+                               struct map *map __maybe_unused,
+                               struct symbol *sym __maybe_unused) { }
 
 /*
  * Find probe function addresses from map.
@@ -2516,6 +2496,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
        struct probe_trace_point *tp;
        int num_matched_functions;
        int ret, i, j, skipped = 0;
+       char *mod_name;
 
        map = get_target_map(pev->target, pev->uprobes);
        if (!map) {
@@ -2600,9 +2581,19 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
                tp->realname = strdup_or_goto(sym->name, nomem_out);
 
                tp->retprobe = pp->retprobe;
-               if (pev->target)
-                       tev->point.module = strdup_or_goto(pev->target,
-                                                          nomem_out);
+               if (pev->target) {
+                       if (pev->uprobes) {
+                               tev->point.module = strdup_or_goto(pev->target,
+                                                                  nomem_out);
+                       } else {
+                               mod_name = find_module_name(pev->target);
+                               tev->point.module =
+                                       strdup(mod_name ? mod_name : pev->target);
+                               free(mod_name);
+                               if (!tev->point.module)
+                                       goto nomem_out;
+                       }
+               }
                tev->uprobes = pev->uprobes;
                tev->nargs = pev->nargs;
                if (tev->nargs) {
@@ -2624,7 +2615,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
                                        strdup_or_goto(pev->args[i].type,
                                                        nomem_out);
                }
-               arch__fix_tev_from_maps(pev, tev, map);
+               arch__fix_tev_from_maps(pev, tev, map, sym);
        }
        if (ret == skipped) {
                ret = -ENOENT;
@@ -2743,9 +2734,13 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 {
        int ret;
 
-       if (pev->uprobes && !pev->group) {
-               /* Replace group name if not given */
-               ret = convert_exec_to_group(pev->target, &pev->group);
+       if (!pev->group) {
+               /* Set group name if not given */
+               if (!pev->uprobes) {
+                       pev->group = strdup(PERFPROBE_GROUP);
+                       ret = pev->group ? 0 : -ENOMEM;
+               } else
+                       ret = convert_exec_to_group(pev->target, &pev->group);
                if (ret != 0) {
                        pr_warning("Failed to make a group name.\n");
                        return ret;
index e54e7b011577ee9748ab5c98c23d8a236798bc44..5a27eb4fad05a29e518ac7b9ecad4a25fb8f4dd6 100644 (file)
@@ -120,7 +120,7 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
 /* Events to command string */
 char *synthesize_perf_probe_command(struct perf_probe_event *pev);
 char *synthesize_probe_trace_command(struct probe_trace_event *tev);
-int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len);
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
 
 /* Check the perf_probe_event needs debuginfo */
 bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
@@ -154,7 +154,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 int show_available_funcs(const char *module, struct strfilter *filter, bool user);
 bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
-                            struct probe_trace_event *tev, struct map *map);
+                            struct probe_trace_event *tev, struct map *map,
+                            struct symbol *sym);
 
 /* If there is no space to write, returns -E2BIG. */
 int e_snprintf(char *str, size_t size, const char *format, ...)
index e3b3b92e44587350b86797e93e57900afd71f94e..3fe6214970e632932cb3809dea4c36d0aa792029 100644 (file)
@@ -220,8 +220,7 @@ int probe_file__add_event(int fd, struct probe_trace_event *tev)
 
        pr_debug("Writing event: %s\n", buf);
        if (!probe_event_dry_run) {
-               ret = write(fd, buf, strlen(buf));
-               if (ret <= 0) {
+               if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
                        ret = -errno;
                        pr_warning("Failed to write event: %s\n",
                                   strerror_r(errno, sbuf, sizeof(sbuf)));
index b3bd0fba023795fce8e862cd0236914125e8d072..9f688758b000cc48f6a4f1ce435c4ac7cdf0d4c5 100644 (file)
@@ -553,7 +553,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
 static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
 {
        Dwarf_Die vr_die;
-       char buf[32], *ptr;
+       char *buf, *ptr;
        int ret = 0;
 
        /* Copy raw parameters */
@@ -563,13 +563,13 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
        if (pf->pvar->name)
                pf->tvar->name = strdup(pf->pvar->name);
        else {
-               ret = synthesize_perf_probe_arg(pf->pvar, buf, 32);
-               if (ret < 0)
-                       return ret;
+               buf = synthesize_perf_probe_arg(pf->pvar);
+               if (!buf)
+                       return -ENOMEM;
                ptr = strchr(buf, ':'); /* Change type separator to _ */
                if (ptr)
                        *ptr = '_';
-               pf->tvar->name = strdup(buf);
+               pf->tvar->name = buf;
        }
        if (pf->tvar->name == NULL)
                return -ENOMEM;
@@ -1334,8 +1334,8 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
                        if (ret2 == 0) {
                                strlist__add(vl->vars,
                                        strbuf_detach(&buf, NULL));
-                       }
-                       strbuf_release(&buf);
+                       } else
+                               strbuf_release(&buf);
                }
        }
 
index 8162ba0e2e57e7d265c283d474f8265430171713..36c6862119e32c1fabd5001b892eb9bac0bd3ed9 100644 (file)
@@ -23,3 +23,4 @@ util/strlist.c
 util/trace-event.c
 ../lib/rbtree.c
 util/string.c
+util/symbol_fprintf.c
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
new file mode 100644 (file)
index 0000000..abc76e3
--- /dev/null
@@ -0,0 +1,149 @@
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+                                   b->thread->shortname) < 0,
+       struct thread *thread;
+)
+{
+       entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+       DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+       struct rb_node *nd;
+
+       resort_rb__for_each(nd, threads) {
+               struct thread *t = threads_entry;
+               printf("%s: %d\n", t->shortname, t->tid);
+       }
+
+ * Then delete it:
+
+       resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ *     struct threads_sorted_entry {}
+ *     threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...)                                  \
+struct __name##_sorted_entry {                                                 \
+       struct rb_node  rb_node;                                                \
+       __VA_ARGS__                                                             \
+};                                                                             \
+static void __name##_sorted__init_entry(struct rb_node *nd,                    \
+                                       struct __name##_sorted_entry *entry);   \
+                                                                               \
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb)      \
+{                                                                              \
+       struct __name##_sorted_entry *a, *b;                                    \
+       a = rb_entry(nda, struct __name##_sorted_entry, rb_node);               \
+       b = rb_entry(ndb, struct __name##_sorted_entry, rb_node);               \
+       return __comp;                                                          \
+}                                                                              \
+                                                                               \
+struct __name##_sorted {                                                       \
+       struct rb_root              entries;                                    \
+       struct __name##_sorted_entry nd[0];                                     \
+};                                                                             \
+                                                                               \
+static void __name##_sorted__insert(struct __name##_sorted *sorted,            \
+                                     struct rb_node *sorted_nd)                \
+{                                                                              \
+       struct rb_node **p = &sorted->entries.rb_node, *parent = NULL;          \
+       while (*p != NULL) {                                                    \
+               parent = *p;                                                    \
+               if (__name##_sorted__cmp(sorted_nd, parent))                    \
+                       p = &(*p)->rb_left;                                     \
+               else                                                            \
+                       p = &(*p)->rb_right;                                    \
+       }                                                                       \
+       rb_link_node(sorted_nd, parent, p);                                     \
+       rb_insert_color(sorted_nd, &sorted->entries);                           \
+}                                                                              \
+                                                                               \
+static void __name##_sorted__sort(struct __name##_sorted *sorted,              \
+                                   struct rb_root *entries)                    \
+{                                                                              \
+       struct rb_node *nd;                                                     \
+       unsigned int i = 0;                                                     \
+       for (nd = rb_first(entries); nd; nd = rb_next(nd)) {                    \
+               struct __name##_sorted_entry *snd = &sorted->nd[i++];           \
+               __name##_sorted__init_entry(nd, snd);                           \
+               __name##_sorted__insert(sorted, &snd->rb_node);                 \
+       }                                                                       \
+}                                                                              \
+                                                                               \
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries,   \
+                                                   int nr_entries)             \
+{                                                                              \
+       struct __name##_sorted *sorted;                                         \
+       sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries);  \
+       if (sorted) {                                                           \
+               sorted->entries = RB_ROOT;                                      \
+               __name##_sorted__sort(sorted, entries);                         \
+       }                                                                       \
+       return sorted;                                                          \
+}                                                                              \
+                                                                               \
+static void __name##_sorted__delete(struct __name##_sorted *sorted)            \
+{                                                                              \
+       free(sorted);                                                           \
+}                                                                              \
+                                                                               \
+static void __name##_sorted__init_entry(struct rb_node *nd,                    \
+                                       struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name)                                              \
+struct __name##_sorted_entry *__name##_entry;                                  \
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each(__nd, __name)                                      \
+       for (__nd = rb_first(&__name->entries);                                 \
+            __name##_entry = rb_entry(__nd, struct __name##_sorted_entry,      \
+                                      rb_node), __nd;                          \
+            __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name)                                              \
+       __name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist)                             \
+       DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries,                     \
+                                 __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine)                   \
+       DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+
+#endif /* _PERF_RESORT_RB_H_ */
index 0467367dc31551122c47e0274e01ac48378c5a6e..481792c7484bd7109be072b6ea22f3fb8cf13089 100644 (file)
@@ -129,7 +129,8 @@ bool perf_can_record_cpu_wide(void)
        return true;
 }
 
-void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+                        struct callchain_param *callchain)
 {
        struct perf_evsel *evsel;
        bool use_sample_identifier = false;
@@ -148,7 +149,7 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
        use_comm_exec = perf_can_comm_exec();
 
        evlist__for_each(evlist, evsel) {
-               perf_evsel__config(evsel, opts);
+               perf_evsel__config(evsel, opts, callchain);
                if (evsel->tracking && use_comm_exec)
                        evsel->attr.comm_exec = 1;
        }
index b3aabc0d4eb0096fff41fb078a09d77988f103ff..62c7f6988e0e584cb16f6deab30a84b3b8672971 100644 (file)
@@ -31,6 +31,8 @@
 #include <perl.h>
 
 #include "../../perf.h"
+#include "../callchain.h"
+#include "../machine.h"
 #include "../thread.h"
 #include "../event.h"
 #include "../trace-event.h"
@@ -248,10 +250,90 @@ static void define_event_symbols(struct event_format *event,
                define_event_symbols(event, ev_name, args->next);
 }
 
+static SV *perl_process_callchain(struct perf_sample *sample,
+                                 struct perf_evsel *evsel,
+                                 struct addr_location *al)
+{
+       AV *list;
+
+       list = newAV();
+       if (!list)
+               goto exit;
+
+       if (!symbol_conf.use_callchain || !sample->callchain)
+               goto exit;
+
+       if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+                                     sample, NULL, NULL,
+                                     sysctl_perf_event_max_stack) != 0) {
+               pr_err("Failed to resolve callchain. Skipping\n");
+               goto exit;
+       }
+       callchain_cursor_commit(&callchain_cursor);
+
+
+       while (1) {
+               HV *elem;
+               struct callchain_cursor_node *node;
+               node = callchain_cursor_current(&callchain_cursor);
+               if (!node)
+                       break;
+
+               elem = newHV();
+               if (!elem)
+                       goto exit;
+
+               if (!hv_stores(elem, "ip", newSVuv(node->ip))) {
+                       hv_undef(elem);
+                       goto exit;
+               }
+
+               if (node->sym) {
+                       HV *sym = newHV();
+                       if (!sym) {
+                               hv_undef(elem);
+                               goto exit;
+                       }
+                       if (!hv_stores(sym, "start",   newSVuv(node->sym->start)) ||
+                           !hv_stores(sym, "end",     newSVuv(node->sym->end)) ||
+                           !hv_stores(sym, "binding", newSVuv(node->sym->binding)) ||
+                           !hv_stores(sym, "name",    newSVpvn(node->sym->name,
+                                                               node->sym->namelen)) ||
+                           !hv_stores(elem, "sym",    newRV_noinc((SV*)sym))) {
+                               hv_undef(sym);
+                               hv_undef(elem);
+                               goto exit;
+                       }
+               }
+
+               if (node->map) {
+                       struct map *map = node->map;
+                       const char *dsoname = "[unknown]";
+                       if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+                               if (symbol_conf.show_kernel_path && map->dso->long_name)
+                                       dsoname = map->dso->long_name;
+                               else if (map->dso->name)
+                                       dsoname = map->dso->name;
+                       }
+                       if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+                               hv_undef(elem);
+                               goto exit;
+                       }
+               }
+
+               callchain_cursor_advance(&callchain_cursor);
+               av_push(list, newRV_noinc((SV*)elem));
+       }
+
+exit:
+       return newRV_noinc((SV*)list);
+}
+
 static void perl_process_tracepoint(struct perf_sample *sample,
                                    struct perf_evsel *evsel,
-                                   struct thread *thread)
+                                   struct addr_location *al)
 {
+       struct thread *thread = al->thread;
        struct event_format *event = evsel->tp_format;
        struct format_field *field;
        static char handler[256];
@@ -295,6 +377,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
        XPUSHs(sv_2mortal(newSVuv(ns)));
        XPUSHs(sv_2mortal(newSViv(pid)));
        XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+       XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
 
        /* common fields other than pid can be accessed via xsub fns */
 
@@ -329,6 +412,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
                XPUSHs(sv_2mortal(newSVuv(nsecs)));
                XPUSHs(sv_2mortal(newSViv(pid)));
                XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+               XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
                call_pv("main::trace_unhandled", G_SCALAR);
        }
        SPAGAIN;
@@ -366,7 +450,7 @@ static void perl_process_event(union perf_event *event,
                               struct perf_evsel *evsel,
                               struct addr_location *al)
 {
-       perl_process_tracepoint(sample, evsel, al->thread);
+       perl_process_tracepoint(sample, evsel, al);
        perl_process_event_generic(event, sample, evsel);
 }
 
@@ -490,7 +574,27 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile)
        fprintf(ofp, "use Perf::Trace::Util;\n\n");
 
        fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
-       fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
+       fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n");
+
+
+       fprintf(ofp, "\n\
+sub print_backtrace\n\
+{\n\
+       my $callchain = shift;\n\
+       for my $node (@$callchain)\n\
+       {\n\
+               if(exists $node->{sym})\n\
+               {\n\
+                       printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\
+               }\n\
+               else\n\
+               {\n\
+                       printf( \"\\t[\\%%x]\\n\", $node{ip});\n\
+               }\n\
+       }\n\
+}\n\n\
+");
+
 
        while ((event = trace_find_next_event(pevent, event))) {
                fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
@@ -502,7 +606,8 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile)
                fprintf(ofp, "$common_secs, ");
                fprintf(ofp, "$common_nsecs,\n");
                fprintf(ofp, "\t    $common_pid, ");
-               fprintf(ofp, "$common_comm,\n\t    ");
+               fprintf(ofp, "$common_comm, ");
+               fprintf(ofp, "$common_callchain,\n\t    ");
 
                not_first = 0;
                count = 0;
@@ -519,7 +624,7 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile)
 
                fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
                        "$common_secs, $common_nsecs,\n\t             "
-                       "$common_pid, $common_comm);\n\n");
+                       "$common_pid, $common_comm, $common_callchain);\n\n");
 
                fprintf(ofp, "\tprintf(\"");
 
@@ -581,17 +686,22 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile)
                                fprintf(ofp, "$%s", f->name);
                }
 
-               fprintf(ofp, ");\n");
+               fprintf(ofp, ");\n\n");
+
+               fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+
                fprintf(ofp, "}\n\n");
        }
 
        fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
                "$common_cpu, $common_secs, $common_nsecs,\n\t    "
-               "$common_pid, $common_comm) = @_;\n\n");
+               "$common_pid, $common_comm, $common_callchain) = @_;\n\n");
 
        fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
                "$common_secs, $common_nsecs,\n\t             $common_pid, "
-               "$common_comm);\n}\n\n");
+               "$common_comm, $common_callchain);\n");
+       fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+       fprintf(ofp, "}\n\n");
 
        fprintf(ofp, "sub print_header\n{\n"
                "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
index fbd05242b4e59786ca0e081a52729248d780f5a0..091bce67844cb9c4dc7e051994ea0222f1c62722 100644 (file)
@@ -41,6 +41,7 @@
 #include "../thread-stack.h"
 #include "../trace-event.h"
 #include "../machine.h"
+#include "../call-path.h"
 #include "thread_map.h"
 #include "cpumap.h"
 #include "stat.h"
@@ -323,7 +324,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
        if (!symbol_conf.use_callchain || !sample->callchain)
                goto exit;
 
-       if (thread__resolve_callchain(al->thread, evsel,
+       if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
                                      sample, NULL, NULL,
                                      scripting_max_stack) != 0) {
                pr_err("Failed to resolve callchain. Skipping\n");
@@ -681,7 +682,7 @@ static int python_export_sample(struct db_export *dbe,
        struct tables *tables = container_of(dbe, struct tables, dbe);
        PyObject *t;
 
-       t = tuple_new(21);
+       t = tuple_new(22);
 
        tuple_set_u64(t, 0, es->db_id);
        tuple_set_u64(t, 1, es->evsel->db_id);
@@ -704,6 +705,7 @@ static int python_export_sample(struct db_export *dbe,
        tuple_set_u64(t, 18, es->sample->data_src);
        tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
        tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+       tuple_set_u64(t, 21, es->call_path_id);
 
        call_object(tables->sample_handler, t, "sample_table");
 
@@ -998,8 +1000,10 @@ static void set_table_handlers(struct tables *tables)
 {
        const char *perf_db_export_mode = "perf_db_export_mode";
        const char *perf_db_export_calls = "perf_db_export_calls";
-       PyObject *db_export_mode, *db_export_calls;
+       const char *perf_db_export_callchains = "perf_db_export_callchains";
+       PyObject *db_export_mode, *db_export_calls, *db_export_callchains;
        bool export_calls = false;
+       bool export_callchains = false;
        int ret;
 
        memset(tables, 0, sizeof(struct tables));
@@ -1016,6 +1020,7 @@ static void set_table_handlers(struct tables *tables)
        if (!ret)
                return;
 
+       /* handle export calls */
        tables->dbe.crp = NULL;
        db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
        if (db_export_calls) {
@@ -1033,6 +1038,33 @@ static void set_table_handlers(struct tables *tables)
                        Py_FatalError("failed to create calls processor");
        }
 
+       /* handle export callchains */
+       tables->dbe.cpr = NULL;
+       db_export_callchains = PyDict_GetItemString(main_dict,
+                                                   perf_db_export_callchains);
+       if (db_export_callchains) {
+               ret = PyObject_IsTrue(db_export_callchains);
+               if (ret == -1)
+                       handler_call_die(perf_db_export_callchains);
+               export_callchains = !!ret;
+       }
+
+       if (export_callchains) {
+               /*
+                * Attempt to use the call path root from the call return
+                * processor, if the call return processor is in use. Otherwise,
+                * we allocate a new call path root. This prevents exporting
+                * duplicate call path ids when both are in use simultaniously.
+                */
+               if (tables->dbe.crp)
+                       tables->dbe.cpr = tables->dbe.crp->cpr;
+               else
+                       tables->dbe.cpr = call_path_root__new();
+
+               if (!tables->dbe.cpr)
+                       Py_FatalError("failed to create calls processor");
+       }
+
        tables->db_export_mode = true;
        /*
         * Reserve per symbol space for symbol->db_id via symbol__priv()
index 4abd85c6346dd5d8193108d8c72668dfe2ef2948..2335b2824d8af979d037231d0b292a05d0f22504 100644 (file)
@@ -409,6 +409,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                tool->stat = process_stat_stub;
        if (tool->stat_round == NULL)
                tool->stat_round = process_stat_round_stub;
+       if (tool->time_conv == NULL)
+               tool->time_conv = process_event_op2_stub;
 }
 
 static void swap_sample_id_all(union perf_event *event, void *data)
@@ -794,6 +796,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
        [PERF_RECORD_STAT]                = perf_event__stat_swap,
        [PERF_RECORD_STAT_ROUND]          = perf_event__stat_round_swap,
        [PERF_RECORD_EVENT_UPDATE]        = perf_event__event_update_swap,
+       [PERF_RECORD_TIME_CONV]           = perf_event__all64_swap,
        [PERF_RECORD_HEADER_MAX]          = NULL,
 };
 
@@ -904,7 +907,7 @@ static void callchain__printf(struct perf_evsel *evsel,
        unsigned int i;
        struct ip_callchain *callchain = sample->callchain;
 
-       if (has_branch_callstack(evsel))
+       if (perf_evsel__has_branch_callstack(evsel))
                callchain__lbr_callstack_printf(sample);
 
        printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
@@ -1078,7 +1081,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
        if (sample_type & PERF_SAMPLE_CALLCHAIN)
                callchain__printf(evsel, sample);
 
-       if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel))
+       if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
                branch_stack__printf(sample);
 
        if (sample_type & PERF_SAMPLE_REGS_USER)
@@ -1341,6 +1344,9 @@ static s64 perf_session__process_user_event(struct perf_session *session,
                return tool->stat(tool, event, session);
        case PERF_RECORD_STAT_ROUND:
                return tool->stat_round(tool, event, session);
+       case PERF_RECORD_TIME_CONV:
+               session->time_conv = event->time_conv;
+               return tool->time_conv(tool, event, session);
        default:
                return -EINVAL;
        }
@@ -1830,7 +1836,11 @@ out:
 out_err:
        ui_progress__finish();
        perf_session__warn_about_errors(session);
-       ordered_events__free(&session->ordered_events);
+       /*
+        * We may switching perf.data output, make ordered_events
+        * reusable.
+        */
+       ordered_events__reinit(&session->ordered_events);
        auxtrace__free_events(session);
        session->one_mmap = false;
        return err;
@@ -1947,105 +1957,6 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
        return NULL;
 }
 
-void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
-                         struct addr_location *al,
-                         unsigned int print_opts, unsigned int stack_depth)
-{
-       struct callchain_cursor_node *node;
-       int print_ip = print_opts & PRINT_IP_OPT_IP;
-       int print_sym = print_opts & PRINT_IP_OPT_SYM;
-       int print_dso = print_opts & PRINT_IP_OPT_DSO;
-       int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET;
-       int print_oneline = print_opts & PRINT_IP_OPT_ONELINE;
-       int print_srcline = print_opts & PRINT_IP_OPT_SRCLINE;
-       char s = print_oneline ? ' ' : '\t';
-
-       if (symbol_conf.use_callchain && sample->callchain) {
-               struct addr_location node_al;
-
-               if (thread__resolve_callchain(al->thread, evsel,
-                                             sample, NULL, NULL,
-                                             stack_depth) != 0) {
-                       if (verbose)
-                               error("Failed to resolve callchain. Skipping\n");
-                       return;
-               }
-               callchain_cursor_commit(&callchain_cursor);
-
-               if (print_symoffset)
-                       node_al = *al;
-
-               while (stack_depth) {
-                       u64 addr = 0;
-
-                       node = callchain_cursor_current(&callchain_cursor);
-                       if (!node)
-                               break;
-
-                       if (node->sym && node->sym->ignore)
-                               goto next;
-
-                       if (print_ip)
-                               printf("%c%16" PRIx64, s, node->ip);
-
-                       if (node->map)
-                               addr = node->map->map_ip(node->map, node->ip);
-
-                       if (print_sym) {
-                               printf(" ");
-                               if (print_symoffset) {
-                                       node_al.addr = addr;
-                                       node_al.map  = node->map;
-                                       symbol__fprintf_symname_offs(node->sym, &node_al, stdout);
-                               } else
-                                       symbol__fprintf_symname(node->sym, stdout);
-                       }
-
-                       if (print_dso) {
-                               printf(" (");
-                               map__fprintf_dsoname(node->map, stdout);
-                               printf(")");
-                       }
-
-                       if (print_srcline)
-                               map__fprintf_srcline(node->map, addr, "\n  ",
-                                                    stdout);
-
-                       if (!print_oneline)
-                               printf("\n");
-
-                       stack_depth--;
-next:
-                       callchain_cursor_advance(&callchain_cursor);
-               }
-
-       } else {
-               if (al->sym && al->sym->ignore)
-                       return;
-
-               if (print_ip)
-                       printf("%16" PRIx64, sample->ip);
-
-               if (print_sym) {
-                       printf(" ");
-                       if (print_symoffset)
-                               symbol__fprintf_symname_offs(al->sym, al,
-                                                            stdout);
-                       else
-                               symbol__fprintf_symname(al->sym, stdout);
-               }
-
-               if (print_dso) {
-                       printf(" (");
-                       map__fprintf_dsoname(al->map, stdout);
-                       printf(")");
-               }
-
-               if (print_srcline)
-                       map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
-       }
-}
-
 int perf_session__cpu_bitmap(struct perf_session *session,
                             const char *cpu_list, unsigned long *cpu_bitmap)
 {
index 5f792e35d4c1e2f72201aceb650798ee3f6a9f26..4bd758553450c8904860d19dd642f790310bc591 100644 (file)
@@ -26,6 +26,7 @@ struct perf_session {
        struct itrace_synth_opts *itrace_synth_opts;
        struct list_head        auxtrace_index;
        struct trace_event      tevent;
+       struct time_conv_event  time_conv;
        bool                    repipe;
        bool                    one_mmap;
        void                    *one_mmap_addr;
@@ -35,13 +36,6 @@ struct perf_session {
        struct perf_tool        *tool;
 };
 
-#define PRINT_IP_OPT_IP                (1<<0)
-#define PRINT_IP_OPT_SYM               (1<<1)
-#define PRINT_IP_OPT_DSO               (1<<2)
-#define PRINT_IP_OPT_SYMOFFSET (1<<3)
-#define PRINT_IP_OPT_ONELINE   (1<<4)
-#define PRINT_IP_OPT_SRCLINE   (1<<5)
-
 struct perf_tool;
 
 struct perf_session *perf_session__new(struct perf_data_file *file,
@@ -103,10 +97,6 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
                                            unsigned int type);
 
-void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
-                         struct addr_location *al,
-                         unsigned int print_opts, unsigned int stack_depth);
-
 int perf_session__cpu_bitmap(struct perf_session *session,
                             const char *cpu_list, unsigned long *cpu_bitmap);
 
index f5ba111cd9fb2a00757a36af8d706a3f4380c92e..20e69edd5006bc29abb0d8be20f0c176085b7502 100644 (file)
@@ -21,13 +21,6 @@ const char   *sort_order;
 const char     *field_order;
 regex_t                ignore_callees_regex;
 int            have_ignore_callees = 0;
-int            sort__need_collapse = 0;
-int            sort__has_parent = 0;
-int            sort__has_sym = 0;
-int            sort__has_dso = 0;
-int            sort__has_socket = 0;
-int            sort__has_thread = 0;
-int            sort__has_comm = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
 
 /*
@@ -244,7 +237,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
         * comparing symbol address alone is not enough since it's a
         * relative address within a dso.
         */
-       if (!sort__has_dso) {
+       if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
                ret = sort__dso_cmp(left, right);
                if (ret != 0)
                        return ret;
@@ -2163,7 +2156,7 @@ static int __sort_dimension__add(struct sort_dimension *sd,
                return -1;
 
        if (sd->entry->se_collapse)
-               sort__need_collapse = 1;
+               list->need_collapse = 1;
 
        sd->taken = 1;
 
@@ -2245,9 +2238,9 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
                                pr_err("Invalid regex: %s\n%s", parent_pattern, err);
                                return -EINVAL;
                        }
-                       sort__has_parent = 1;
+                       list->parent = 1;
                } else if (sd->entry == &sort_sym) {
-                       sort__has_sym = 1;
+                       list->sym = 1;
                        /*
                         * perf diff displays the performance difference amongst
                         * two or more perf.data files. Those files could come
@@ -2258,13 +2251,13 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
                                sd->entry->se_collapse = sort__sym_sort;
 
                } else if (sd->entry == &sort_dso) {
-                       sort__has_dso = 1;
+                       list->dso = 1;
                } else if (sd->entry == &sort_socket) {
-                       sort__has_socket = 1;
+                       list->socket = 1;
                } else if (sd->entry == &sort_thread) {
-                       sort__has_thread = 1;
+                       list->thread = 1;
                } else if (sd->entry == &sort_comm) {
-                       sort__has_comm = 1;
+                       list->comm = 1;
                }
 
                return __sort_dimension__add(sd, list, level);
@@ -2289,7 +2282,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
                        return -EINVAL;
 
                if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
-                       sort__has_sym = 1;
+                       list->sym = 1;
 
                __sort_dimension__add(sd, list, level);
                return 0;
@@ -2305,7 +2298,7 @@ static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
                        return -EINVAL;
 
                if (sd->entry == &sort_mem_daddr_sym)
-                       sort__has_sym = 1;
+                       list->sym = 1;
 
                __sort_dimension__add(sd, list, level);
                return 0;
@@ -2749,10 +2742,10 @@ int setup_sorting(struct perf_evlist *evlist)
 
 void reset_output_field(void)
 {
-       sort__need_collapse = 0;
-       sort__has_parent = 0;
-       sort__has_sym = 0;
-       sort__has_dso = 0;
+       perf_hpp_list.need_collapse = 0;
+       perf_hpp_list.parent = 0;
+       perf_hpp_list.sym = 0;
+       perf_hpp_list.dso = 0;
 
        field_order = NULL;
        sort_order = NULL;
index 3f4e359981192ac50b56e770aa472749666e1f98..42927f448bcbc2f5ae39b9cd416948387d898825 100644 (file)
@@ -31,13 +31,6 @@ extern const char *parent_pattern;
 extern const char default_sort_order[];
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
-extern int sort__need_collapse;
-extern int sort__has_dso;
-extern int sort__has_parent;
-extern int sort__has_sym;
-extern int sort__has_socket;
-extern int sort__has_thread;
-extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
index bc229a74c6a9aa70f2605fe04b7076c9c346f6cb..87a297dd89016e13a829d4a7397c74e0892eae30 100644 (file)
@@ -709,17 +709,10 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
        if (ss->opdshdr.sh_type != SHT_PROGBITS)
                ss->opdsec = NULL;
 
-       if (dso->kernel == DSO_TYPE_USER) {
-               GElf_Shdr shdr;
-               ss->adjust_symbols = (ehdr.e_type == ET_EXEC ||
-                               ehdr.e_type == ET_REL ||
-                               dso__is_vdso(dso) ||
-                               elf_section_by_name(elf, &ehdr, &shdr,
-                                                    ".gnu.prelink_undo",
-                                                    NULL) != NULL);
-       } else {
+       if (dso->kernel == DSO_TYPE_USER)
+               ss->adjust_symbols = true;
+       else
                ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
-       }
 
        ss->name   = strdup(name);
        if (!ss->name) {
@@ -777,7 +770,8 @@ static bool want_demangle(bool is_kernel_sym)
        return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
 }
 
-void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+               GElf_Sym *sym __maybe_unused) { }
 
 int dso__load_sym(struct dso *dso, struct map *map,
                  struct symsrc *syms_ss, struct symsrc *runtime_ss,
@@ -954,8 +948,6 @@ int dso__load_sym(struct dso *dso, struct map *map,
                    (sym.st_value & 1))
                        --sym.st_value;
 
-               arch__elf_sym_adjust(&sym);
-
                if (dso->kernel || kmodule) {
                        char dso_name[PATH_MAX];
 
@@ -1089,6 +1081,8 @@ new_symbol:
                if (!f)
                        goto out_elf_end;
 
+               arch__sym_update(f, &sym);
+
                if (filter && filter(curr_map, f))
                        symbol__delete(f);
                else {
index e7588dc915181729394c1d2195c78576c47d20df..415c4f6d98fd4c54fade340513c8e06d3968ba03 100644 (file)
@@ -255,40 +255,6 @@ void symbol__delete(struct symbol *sym)
        free(((void *)sym) - symbol_conf.priv_size);
 }
 
-size_t symbol__fprintf(struct symbol *sym, FILE *fp)
-{
-       return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
-                      sym->start, sym->end,
-                      sym->binding == STB_GLOBAL ? 'g' :
-                      sym->binding == STB_LOCAL  ? 'l' : 'w',
-                      sym->name);
-}
-
-size_t symbol__fprintf_symname_offs(const struct symbol *sym,
-                                   const struct addr_location *al, FILE *fp)
-{
-       unsigned long offset;
-       size_t length;
-
-       if (sym && sym->name) {
-               length = fprintf(fp, "%s", sym->name);
-               if (al) {
-                       if (al->addr < sym->end)
-                               offset = al->addr - sym->start;
-                       else
-                               offset = al->addr - al->map->start - sym->start;
-                       length += fprintf(fp, "+0x%lx", offset);
-               }
-               return length;
-       } else
-               return fprintf(fp, "[unknown]");
-}
-
-size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
-{
-       return symbol__fprintf_symname_offs(sym, NULL, fp);
-}
-
 void symbols__delete(struct rb_root *symbols)
 {
        struct symbol *pos;
@@ -364,11 +330,6 @@ static struct symbol *symbols__next(struct symbol *sym)
        return NULL;
 }
 
-struct symbol_name_rb_node {
-       struct rb_node  rb_node;
-       struct symbol   sym;
-};
-
 static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
 {
        struct rb_node **p = &symbols->rb_node;
@@ -497,21 +458,6 @@ void dso__sort_by_name(struct dso *dso, enum map_type type)
                                     &dso->symbols[type]);
 }
 
-size_t dso__fprintf_symbols_by_name(struct dso *dso,
-                                   enum map_type type, FILE *fp)
-{
-       size_t ret = 0;
-       struct rb_node *nd;
-       struct symbol_name_rb_node *pos;
-
-       for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
-               pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
-               fprintf(fp, "%s\n", pos->sym.name);
-       }
-
-       return ret;
-}
-
 int modules__parse(const char *filename, void *arg,
                   int (*process_module)(void *arg, const char *name,
                                         u64 start))
@@ -1262,8 +1208,8 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
        return 0;
 }
 
-int dso__load_kallsyms(struct dso *dso, const char *filename,
-                      struct map *map, symbol_filter_t filter)
+int __dso__load_kallsyms(struct dso *dso, const char *filename,
+                        struct map *map, bool no_kcore, symbol_filter_t filter)
 {
        u64 delta = 0;
 
@@ -1284,12 +1230,18 @@ int dso__load_kallsyms(struct dso *dso, const char *filename,
        else
                dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
-       if (!dso__load_kcore(dso, map, filename))
+       if (!no_kcore && !dso__load_kcore(dso, map, filename))
                return dso__split_kallsyms_for_kcore(dso, map, filter);
        else
                return dso__split_kallsyms(dso, map, delta, filter);
 }
 
+int dso__load_kallsyms(struct dso *dso, const char *filename,
+                      struct map *map, symbol_filter_t filter)
+{
+       return __dso__load_kallsyms(dso, filename, map, false, filter);
+}
+
 static int dso__load_perf_map(struct dso *dso, struct map *map,
                              symbol_filter_t filter)
 {
index c8b7544d92675a0833c87404f8f236f5446df2c2..07211c2f8456b409982532632726cb524c150ac1 100644 (file)
@@ -55,6 +55,7 @@ struct symbol {
        u16             namelen;
        u8              binding;
        bool            ignore;
+       u8              arch_sym;
        char            name[0];
 };
 
@@ -140,6 +141,11 @@ struct symbol_conf {
 
 extern struct symbol_conf symbol_conf;
 
+struct symbol_name_rb_node {
+       struct rb_node  rb_node;
+       struct symbol   sym;
+};
+
 static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
 {
        return path__join(bf, size, symbol_conf.symfs, path);
@@ -235,6 +241,8 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
                      symbol_filter_t filter);
 int dso__load_vmlinux_path(struct dso *dso, struct map *map,
                           symbol_filter_t filter);
+int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
+                        bool no_kcore, symbol_filter_t filter);
 int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
                       symbol_filter_t filter);
 
@@ -262,8 +270,14 @@ int symbol__init(struct perf_env *env);
 void symbol__exit(void);
 void symbol__elf_init(void);
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+                                     const struct addr_location *al,
+                                     bool unknown_as_addr, FILE *fp);
 size_t symbol__fprintf_symname_offs(const struct symbol *sym,
                                    const struct addr_location *al, FILE *fp);
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+                                const struct addr_location *al,
+                                bool unknown_as_addr, FILE *fp);
 size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
 size_t symbol__fprintf(struct symbol *sym, FILE *fp);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
@@ -310,7 +324,7 @@ int setup_intlist(struct intlist **list, const char *list_str,
 
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
-void arch__elf_sym_adjust(GElf_Sym *sym);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
 #endif
 
 #define SYMBOL_A 0
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
new file mode 100644 (file)
index 0000000..a680bda
--- /dev/null
@@ -0,0 +1,71 @@
+#include <elf.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "symbol.h"
+
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+{
+       return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
+                      sym->start, sym->end,
+                      sym->binding == STB_GLOBAL ? 'g' :
+                      sym->binding == STB_LOCAL  ? 'l' : 'w',
+                      sym->name);
+}
+
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+                                     const struct addr_location *al,
+                                     bool unknown_as_addr, FILE *fp)
+{
+       unsigned long offset;
+       size_t length;
+
+       if (sym && sym->name) {
+               length = fprintf(fp, "%s", sym->name);
+               if (al) {
+                       if (al->addr < sym->end)
+                               offset = al->addr - sym->start;
+                       else
+                               offset = al->addr - al->map->start - sym->start;
+                       length += fprintf(fp, "+0x%lx", offset);
+               }
+               return length;
+       } else if (al && unknown_as_addr)
+               return fprintf(fp, "[%#" PRIx64 "]", al->addr);
+       else
+               return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+                                   const struct addr_location *al,
+                                   FILE *fp)
+{
+       return __symbol__fprintf_symname_offs(sym, al, false, fp);
+}
+
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+                                const struct addr_location *al,
+                                bool unknown_as_addr, FILE *fp)
+{
+       return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp);
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+       return __symbol__fprintf_symname_offs(sym, NULL, false, fp);
+}
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+                                   enum map_type type, FILE *fp)
+{
+       size_t ret = 0;
+       struct rb_node *nd;
+       struct symbol_name_rb_node *pos;
+
+       for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
+               pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+               fprintf(fp, "%s\n", pos->sym.name);
+       }
+
+       return ret;
+}
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
new file mode 100644 (file)
index 0000000..bbb4c19
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * System call table mapper
+ *
+ * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "syscalltbl.h"
+#include <stdlib.h>
+
+#ifdef HAVE_SYSCALL_TABLE
+#include <linux/compiler.h>
+#include <string.h>
+#include "util.h"
+
+#if defined(__x86_64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_x86_64;
+#endif
+
+struct syscall {
+       int id;
+       const char *name;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
+{
+       const char *key = vkey;
+       const struct syscall *entry = ventry;
+
+       return strcmp(key, entry->name);
+}
+
+static int syscallcmp(const void *va, const void *vb)
+{
+       const struct syscall *a = va, *b = vb;
+
+       return strcmp(a->name, b->name);
+}
+
+static int syscalltbl__init_native(struct syscalltbl *tbl)
+{
+       int nr_entries = 0, i, j;
+       struct syscall *entries;
+
+       for (i = 0; i <= syscalltbl_native_max_id; ++i)
+               if (syscalltbl_native[i])
+                       ++nr_entries;
+
+       entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
+       if (tbl->syscalls.entries == NULL)
+               return -1;
+
+       for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
+               if (syscalltbl_native[i]) {
+                       entries[j].name = syscalltbl_native[i];
+                       entries[j].id = i;
+                       ++j;
+               }
+       }
+
+       qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
+       tbl->syscalls.nr_entries = nr_entries;
+       return 0;
+}
+
+struct syscalltbl *syscalltbl__new(void)
+{
+       struct syscalltbl *tbl = malloc(sizeof(*tbl));
+       if (tbl) {
+               if (syscalltbl__init_native(tbl)) {
+                       free(tbl);
+                       return NULL;
+               }
+       }
+       return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+       zfree(&tbl->syscalls.entries);
+       free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+{
+       return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+       struct syscall *sc = bsearch(name, tbl->syscalls.entries,
+                                    tbl->syscalls.nr_entries, sizeof(*sc),
+                                    syscallcmpname);
+
+       return sc ? sc->id : -1;
+}
+
+#else /* HAVE_SYSCALL_TABLE */
+
+#include <libaudit.h>
+
+struct syscalltbl *syscalltbl__new(void)
+{
+       struct syscalltbl *tbl = malloc(sizeof(*tbl));
+       if (tbl)
+               tbl->audit_machine = audit_detect_machine();
+       return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+       free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
+{
+       return audit_syscall_to_name(id, tbl->audit_machine);
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+       return audit_name_to_syscall(name, tbl->audit_machine);
+}
+#endif /* HAVE_SYSCALL_TABLE */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
new file mode 100644 (file)
index 0000000..e295151
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef __PERF_SYSCALLTBL_H
+#define __PERF_SYSCALLTBL_H
+
+struct syscalltbl {
+       union {
+               int audit_machine;
+               struct {
+                       int nr_entries;
+                       void *entries;
+               } syscalls;
+       };
+};
+
+struct syscalltbl *syscalltbl__new(void);
+void syscalltbl__delete(struct syscalltbl *tbl);
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
+int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+
+#endif /* __PERF_SYSCALLTBL_H */
index 679688e70ae7e72e73d14cc7659cb965cc4d7016..825086aa9a08685303c08cddb36823620fa65c9e 100644 (file)
 #include "debug.h"
 #include "symbol.h"
 #include "comm.h"
+#include "call-path.h"
 #include "thread-stack.h"
 
-#define CALL_PATH_BLOCK_SHIFT 8
-#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
-#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
-
-struct call_path_block {
-       struct call_path cp[CALL_PATH_BLOCK_SIZE];
-       struct list_head node;
-};
-
-/**
- * struct call_path_root - root of all call paths.
- * @call_path: root call path
- * @blocks: list of blocks to store call paths
- * @next: next free space
- * @sz: number of spaces
- */
-struct call_path_root {
-       struct call_path call_path;
-       struct list_head blocks;
-       size_t next;
-       size_t sz;
-};
-
-/**
- * struct call_return_processor - provides a call-back to consume call-return
- *                                information.
- * @cpr: call path root
- * @process: call-back that accepts call/return information
- * @data: anonymous data for call-back
- */
-struct call_return_processor {
-       struct call_path_root *cpr;
-       int (*process)(struct call_return *cr, void *data);
-       void *data;
-};
-
 #define STACK_GROWTH 2048
 
 /**
@@ -335,108 +300,6 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
                chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
 }
 
-static void call_path__init(struct call_path *cp, struct call_path *parent,
-                           struct symbol *sym, u64 ip, bool in_kernel)
-{
-       cp->parent = parent;
-       cp->sym = sym;
-       cp->ip = sym ? 0 : ip;
-       cp->db_id = 0;
-       cp->in_kernel = in_kernel;
-       RB_CLEAR_NODE(&cp->rb_node);
-       cp->children = RB_ROOT;
-}
-
-static struct call_path_root *call_path_root__new(void)
-{
-       struct call_path_root *cpr;
-
-       cpr = zalloc(sizeof(struct call_path_root));
-       if (!cpr)
-               return NULL;
-       call_path__init(&cpr->call_path, NULL, NULL, 0, false);
-       INIT_LIST_HEAD(&cpr->blocks);
-       return cpr;
-}
-
-static void call_path_root__free(struct call_path_root *cpr)
-{
-       struct call_path_block *pos, *n;
-
-       list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
-               list_del(&pos->node);
-               free(pos);
-       }
-       free(cpr);
-}
-
-static struct call_path *call_path__new(struct call_path_root *cpr,
-                                       struct call_path *parent,
-                                       struct symbol *sym, u64 ip,
-                                       bool in_kernel)
-{
-       struct call_path_block *cpb;
-       struct call_path *cp;
-       size_t n;
-
-       if (cpr->next < cpr->sz) {
-               cpb = list_last_entry(&cpr->blocks, struct call_path_block,
-                                     node);
-       } else {
-               cpb = zalloc(sizeof(struct call_path_block));
-               if (!cpb)
-                       return NULL;
-               list_add_tail(&cpb->node, &cpr->blocks);
-               cpr->sz += CALL_PATH_BLOCK_SIZE;
-       }
-
-       n = cpr->next++ & CALL_PATH_BLOCK_MASK;
-       cp = &cpb->cp[n];
-
-       call_path__init(cp, parent, sym, ip, in_kernel);
-
-       return cp;
-}
-
-static struct call_path *call_path__findnew(struct call_path_root *cpr,
-                                           struct call_path *parent,
-                                           struct symbol *sym, u64 ip, u64 ks)
-{
-       struct rb_node **p;
-       struct rb_node *node_parent = NULL;
-       struct call_path *cp;
-       bool in_kernel = ip >= ks;
-
-       if (sym)
-               ip = 0;
-
-       if (!parent)
-               return call_path__new(cpr, parent, sym, ip, in_kernel);
-
-       p = &parent->children.rb_node;
-       while (*p != NULL) {
-               node_parent = *p;
-               cp = rb_entry(node_parent, struct call_path, rb_node);
-
-               if (cp->sym == sym && cp->ip == ip)
-                       return cp;
-
-               if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
-       }
-
-       cp = call_path__new(cpr, parent, sym, ip, in_kernel);
-       if (!cp)
-               return NULL;
-
-       rb_link_node(&cp->rb_node, node_parent, p);
-       rb_insert_color(&cp->rb_node, &parent->children);
-
-       return cp;
-}
-
 struct call_return_processor *
 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
                           void *data)
index e1528f1374c3e5131efe8c2293ef9a6736ea3ed4..ad44c7944b8e72bbdbd931983cf972d3a598799e 100644 (file)
 #include <sys/types.h>
 
 #include <linux/types.h>
-#include <linux/rbtree.h>
 
 struct thread;
 struct comm;
 struct ip_callchain;
 struct symbol;
 struct dso;
-struct call_return_processor;
 struct comm;
 struct perf_sample;
 struct addr_location;
+struct call_path;
 
 /*
  * Call/Return flags.
@@ -69,26 +68,16 @@ struct call_return {
 };
 
 /**
- * struct call_path - node in list of calls leading to a function call.
- * @parent: call path to the parent function call
- * @sym: symbol of function called
- * @ip: only if sym is null, the ip of the function
- * @db_id: id used for db-export
- * @in_kernel: whether function is a in the kernel
- * @rb_node: node in parent's tree of called functions
- * @children: tree of call paths of functions called
- *
- * In combination with the call_return structure, the call_path structure
- * defines a context-sensitve call-graph.
+ * struct call_return_processor - provides a call-back to consume call-return
+ *                                information.
+ * @cpr: call path root
+ * @process: call-back that accepts call/return information
+ * @data: anonymous data for call-back
  */
-struct call_path {
-       struct call_path *parent;
-       struct symbol *sym;
-       u64 ip;
-       u64 db_id;
-       bool in_kernel;
-       struct rb_node rb_node;
-       struct rb_root children;
+struct call_return_processor {
+       struct call_path_root *cpr;
+       int (*process)(struct call_return *cr, void *data);
+       void *data;
 };
 
 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
index dfd00c6dad6e68ad3dfde68cbb9e0b16a95f1fee..45fcb715a36b3f6a975600d41eb877b387b78417 100644 (file)
@@ -10,6 +10,8 @@
 #include "comm.h"
 #include "unwind.h"
 
+#include <api/fs/fs.h>
+
 int thread__init_map_groups(struct thread *thread, struct machine *machine)
 {
        struct thread *leader;
@@ -153,6 +155,23 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
        return 0;
 }
 
+int thread__set_comm_from_proc(struct thread *thread)
+{
+       char path[64];
+       char *comm = NULL;
+       size_t sz;
+       int err = -1;
+
+       if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
+                      thread->pid_, thread->tid) >= (int)sizeof(path)) &&
+           procfs__read_str(path, &comm, &sz) == 0) {
+               comm[sz - 1] = '\0';
+               err = thread__set_comm(thread, comm, 0);
+       }
+
+       return err;
+}
+
 const char *thread__comm_str(const struct thread *thread)
 {
        const struct comm *comm = thread__comm(thread);
@@ -233,7 +252,7 @@ void thread__find_cpumode_addr_location(struct thread *thread,
                                        struct addr_location *al)
 {
        size_t i;
-       const u8 const cpumodes[] = {
+       const u8 cpumodes[] = {
                PERF_RECORD_MISC_USER,
                PERF_RECORD_MISC_KERNEL,
                PERF_RECORD_MISC_GUEST_USER,
index a0ac0317affb5ffc46f69dc00c4c258d0c40c684..45fba13c800bd36150248d4ade2b9f0b4b053329 100644 (file)
@@ -9,6 +9,9 @@
 #include "symbol.h"
 #include <strlist.h>
 #include <intlist.h>
+#ifdef HAVE_LIBUNWIND_SUPPORT
+#include <libunwind.h>
+#endif
 
 struct thread_stack;
 
@@ -32,6 +35,9 @@ struct thread {
 
        void                    *priv;
        struct thread_stack     *ts;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+       unw_addr_space_t        addr_space;
+#endif
 };
 
 struct machine;
@@ -65,6 +71,8 @@ static inline int thread__set_comm(struct thread *thread, const char *comm,
        return __thread__set_comm(thread, comm, timestamp, false);
 }
 
+int thread__set_comm_from_proc(struct thread *thread);
+
 int thread__comm_len(struct thread *thread);
 struct comm *thread__comm(const struct thread *thread);
 struct comm *thread__exec_comm(const struct thread *thread);
index 08afc69099538f66172968dc3827fd9b7b40d5c2..5654fe15e036795896d3548c568bd8fa3657da06 100644 (file)
@@ -94,7 +94,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
        DIR *proc;
        int max_threads = 32, items, i;
        char path[256];
-       struct dirent dirent, *next, **namelist = NULL;
+       struct dirent *dirent, **namelist = NULL;
        struct thread_map *threads = thread_map__alloc(max_threads);
 
        if (threads == NULL)
@@ -107,16 +107,16 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
        threads->nr = 0;
        atomic_set(&threads->refcnt, 1);
 
-       while (!readdir_r(proc, &dirent, &next) && next) {
+       while ((dirent = readdir(proc)) != NULL) {
                char *end;
                bool grow = false;
                struct stat st;
-               pid_t pid = strtol(dirent.d_name, &end, 10);
+               pid_t pid = strtol(dirent->d_name, &end, 10);
 
                if (*end) /* only interested in proper numerical dirents */
                        continue;
 
-               snprintf(path, sizeof(path), "/proc/%s", dirent.d_name);
+               snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
 
                if (stat(path, &st) != 0)
                        continue;
@@ -260,7 +260,7 @@ struct thread_map *thread_map__new_dummy(void)
        return threads;
 }
 
-static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
 {
        struct thread_map *threads = NULL, *nt;
        int ntasks = 0;
@@ -436,3 +436,15 @@ struct thread_map *thread_map__new_event(struct thread_map_event *event)
 
        return threads;
 }
+
+bool thread_map__has(struct thread_map *threads, pid_t pid)
+{
+       int i;
+
+       for (i = 0; i < threads->nr; ++i) {
+               if (threads->map[i].pid == pid)
+                       return true;
+       }
+
+       return false;
+}
index 85e4c7c4fbde1fd5455ed0fa58375aac3934b445..bd3b971588da57ce14951df6d0a0870a6def1355 100644 (file)
@@ -31,6 +31,8 @@ void thread_map__put(struct thread_map *map);
 struct thread_map *thread_map__new_str(const char *pid,
                const char *tid, uid_t uid);
 
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
+
 size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
 
 static inline int thread_map__nr(struct thread_map *threads)
@@ -55,4 +57,5 @@ static inline char *thread_map__comm(struct thread_map *map, int thread)
 }
 
 void thread_map__read_comms(struct thread_map *threads);
+bool thread_map__has(struct thread_map *threads, pid_t pid);
 #endif /* __PERF_THREAD_MAP_H */
index 55de4cffcd4e9ae2ee2063dc5efc05d0d72fc7db..ac2590a3de2d1586fa60a1893d9ec4233967b5e0 100644 (file)
@@ -57,6 +57,7 @@ struct perf_tool {
                        id_index,
                        auxtrace_info,
                        auxtrace_error,
+                       time_conv,
                        thread_map,
                        cpu_map,
                        stat_config,
diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
new file mode 100644 (file)
index 0000000..e97d701
--- /dev/null
@@ -0,0 +1,94 @@
+#ifndef __TRIGGER_H_
+#define __TRIGGER_H_ 1
+
+#include "util/debug.h"
+#include "asm/bug.h"
+
+/*
+ * Use trigger to model operations which need to be executed when
+ * an event (a signal, for example) is observed.
+ *
+ * States and transits:
+ *
+ *
+ *  OFF--(on)--> READY --(hit)--> HIT
+ *                 ^               |
+ *                 |            (ready)
+ *                 |               |
+ *                  \_____________/
+ *
+ * is_hit and is_ready are two key functions to query the state of
+ * a trigger. is_hit means the event already happen; is_ready means the
+ * trigger is waiting for the event.
+ */
+
+struct trigger {
+       volatile enum {
+               TRIGGER_ERROR           = -2,
+               TRIGGER_OFF             = -1,
+               TRIGGER_READY           = 0,
+               TRIGGER_HIT             = 1,
+       } state;
+       const char *name;
+};
+
+#define TRIGGER_WARN_ONCE(t, exp) \
+       WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \
+                 t->name, t->state, __func__)
+
+static inline bool trigger_is_available(struct trigger *t)
+{
+       return t->state >= 0;
+}
+
+static inline bool trigger_is_error(struct trigger *t)
+{
+       return t->state <= TRIGGER_ERROR;
+}
+
+static inline void trigger_on(struct trigger *t)
+{
+       TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
+       t->state = TRIGGER_READY;
+}
+
+static inline void trigger_ready(struct trigger *t)
+{
+       if (!trigger_is_available(t))
+               return;
+       t->state = TRIGGER_READY;
+}
+
+static inline void trigger_hit(struct trigger *t)
+{
+       if (!trigger_is_available(t))
+               return;
+       TRIGGER_WARN_ONCE(t, TRIGGER_READY);
+       t->state = TRIGGER_HIT;
+}
+
+static inline void trigger_off(struct trigger *t)
+{
+       if (!trigger_is_available(t))
+               return;
+       t->state = TRIGGER_OFF;
+}
+
+static inline void trigger_error(struct trigger *t)
+{
+       t->state = TRIGGER_ERROR;
+}
+
+static inline bool trigger_is_ready(struct trigger *t)
+{
+       return t->state == TRIGGER_READY;
+}
+
+static inline bool trigger_is_hit(struct trigger *t)
+{
+       return t->state == TRIGGER_HIT;
+}
+
+#define DEFINE_TRIGGER(n) \
+struct trigger n = {.state = TRIGGER_OFF, .name = #n}
+#endif
index a8b78f1b3243891c40560c6501c93ba2d377e37c..d5b11e2b85e050427ad80958fb747e3b7c4c5e61 100644 (file)
@@ -3,10 +3,29 @@
 
 #include <linux/types.h>
 
-#include "../arch/x86/util/tsc.h"
+#include "event.h"
+
+struct perf_tsc_conversion {
+       u16 time_shift;
+       u32 time_mult;
+       u64 time_zero;
+};
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+                            struct perf_tsc_conversion *tc);
 
 u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
 u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
 u64 rdtsc(void);
 
+struct perf_event_mmap_page;
+struct perf_tool;
+struct machine;
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+                               struct perf_tool *tool,
+                               perf_event__handler_t process,
+                               struct machine *machine);
+
 #endif
index ee7e372297e59adb7eb3c7a1ffec8acb6186f704..63687d3a344e7f39b94f465547e929b2c2779b40 100644 (file)
@@ -32,6 +32,7 @@
 #include "symbol.h"
 #include "util.h"
 #include "debug.h"
+#include "asm/bug.h"
 
 extern int
 UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
@@ -580,43 +581,33 @@ static unw_accessors_t accessors = {
 
 int unwind__prepare_access(struct thread *thread)
 {
-       unw_addr_space_t addr_space;
-
        if (callchain_param.record_mode != CALLCHAIN_DWARF)
                return 0;
 
-       addr_space = unw_create_addr_space(&accessors, 0);
-       if (!addr_space) {
+       thread->addr_space = unw_create_addr_space(&accessors, 0);
+       if (!thread->addr_space) {
                pr_err("unwind: Can't create unwind address space.\n");
                return -ENOMEM;
        }
 
-       unw_set_caching_policy(addr_space, UNW_CACHE_GLOBAL);
-       thread__set_priv(thread, addr_space);
-
+       unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
        return 0;
 }
 
 void unwind__flush_access(struct thread *thread)
 {
-       unw_addr_space_t addr_space;
-
        if (callchain_param.record_mode != CALLCHAIN_DWARF)
                return;
 
-       addr_space = thread__priv(thread);
-       unw_flush_cache(addr_space, 0, 0);
+       unw_flush_cache(thread->addr_space, 0, 0);
 }
 
 void unwind__finish_access(struct thread *thread)
 {
-       unw_addr_space_t addr_space;
-
        if (callchain_param.record_mode != CALLCHAIN_DWARF)
                return;
 
-       addr_space = thread__priv(thread);
-       unw_destroy_addr_space(addr_space);
+       unw_destroy_addr_space(thread->addr_space);
 }
 
 static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
@@ -639,7 +630,9 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
         * unwind itself.
         */
        if (max_stack - 1 > 0) {
-               addr_space = thread__priv(ui->thread);
+               WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+               addr_space = ui->thread->addr_space;
+
                if (addr_space == NULL)
                        return -1;
 
index b7766c577b015d978fd3e9960c451692f81daa6e..619ba2061b62fcd9574da5c24ad2dadd54581120 100644 (file)
@@ -33,6 +33,8 @@ struct callchain_param        callchain_param = {
 unsigned int page_size;
 int cacheline_size;
 
+unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+
 bool test_attr__enabled;
 
 bool perf_host  = true;
@@ -117,6 +119,40 @@ int rm_rf(char *path)
        return rmdir(path);
 }
 
+/* A filter which removes dot files */
+bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
+{
+       return d->d_name[0] != '.';
+}
+
+/* lsdir reads a directory and store it in strlist */
+struct strlist *lsdir(const char *name,
+                     bool (*filter)(const char *, struct dirent *))
+{
+       struct strlist *list = NULL;
+       DIR *dir;
+       struct dirent *d;
+
+       dir = opendir(name);
+       if (!dir)
+               return NULL;
+
+       list = strlist__new(NULL, NULL);
+       if (!list) {
+               errno = -ENOMEM;
+               goto out;
+       }
+
+       while ((d = readdir(dir)) != NULL) {
+               if (!filter || filter(name, d))
+                       strlist__add(list, d->d_name);
+       }
+
+out:
+       closedir(dir);
+       return list;
+}
+
 static int slow_copyfile(const char *from, const char *to)
 {
        int err = -1;
index 8298d607c7383a4255f43e42f1eae219189f8209..88f607af1f47036842f6ff77ad2c203b68dcb7c3 100644 (file)
@@ -79,6 +79,7 @@
 #include <termios.h>
 #include <linux/bitops.h>
 #include <termios.h>
+#include "strlist.h"
 
 extern const char *graph_line;
 extern const char *graph_dotted_line;
@@ -222,6 +223,8 @@ static inline int sane_case(int x, int high)
 
 int mkdir_p(char *path, mode_t mode);
 int rm_rf(char *path);
+struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
+bool lsdir_no_dot_filter(const char *name, struct dirent *d);
 int copyfile(const char *from, const char *to);
 int copyfile_mode(const char *from, const char *to, mode_t mode);
 int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
@@ -254,11 +257,17 @@ int hex2u64(const char *ptr, u64 *val);
 char *ltrim(char *s);
 char *rtrim(char *s);
 
+static inline char *trim(char *s)
+{
+       return ltrim(rtrim(s));
+}
+
 void dump_stack(void);
 void sighandler_dump_stack(int sig);
 
 extern unsigned int page_size;
 extern int cacheline_size;
+extern unsigned int sysctl_perf_event_max_stack;
 
 struct parse_tag {
        char tag;