select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
+ select HAVE_HW_BREAKPOINT
select HAVE_ARCH_KMEMCHECK
config OUTPUT_FORMAT
If unsure, say Y. Only embedded should say N here.
-config CC_STACKPROTECTOR_ALL
- bool
-
config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
- select CC_STACKPROTECTOR_ALL
---help---
This option turns on the -fstack-protector GCC feature. This
feature puts, at the beginning of functions, a canary value on
#include <linux/math64.h>
#include <linux/init.h>
+ #define HBP_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
+ struct perf_event;
+
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
unsigned long fs;
#endif
unsigned long gs;
- /* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
- unsigned long debugreg6;
- unsigned long debugreg7;
+ /* Save middle states of ptrace breakpoints */
+ struct perf_event *ptrace_bps[HBP_NUM];
+ /* Debug status used for traps, single steps, etc... */
+ unsigned long debugreg6;
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
-#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
+extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
#include <linux/preempt.h>
#include <linux/module.h>
#include <linux/kdebug.h>
+#include <linux/kallsyms.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
#include <asm/alternative.h>
+#include <asm/insn.h>
+ #include <asm/debugreg.h>
void jprobe_return_end(void);
/* ----------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
-static const u32 onebyte_has_modrm[256 / 32] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ----------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */
- W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */
- W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */
- W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */
- W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
- W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */
- W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */
- W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */
- W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
- W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */
- W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */
- W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */
- W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */
- W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
- W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */
- W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */
- /* ----------------------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
-};
-static const u32 twobyte_has_modrm[256 / 32] = {
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* ----------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */
- W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */
- W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */
- W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */
- W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */
- W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */
- W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */
- W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */
- W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */
- W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */
- W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */
- W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */
- W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */
- W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */
- W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */
- W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */
- /* ----------------------------------------------- */
- /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
-};
#undef W
struct kretprobe_blackpoint kretprobe_blacklist[] = {
}
}
+/* Recover the probed instruction at addr for further analysis. */
+static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+ struct kprobe *kp;
+ kp = get_kprobe((void *)addr);
+ if (!kp)
+ return -EINVAL;
+
+ /*
+ * Basically, kp->ainsn.insn has an original instruction.
+ * However, RIP-relative instruction can not do single-stepping
+ * at different place, fix_riprel() tweaks the displacement of
+ * that instruction. In that case, we can't recover the instruction
+ * from the kp->ainsn.insn.
+ *
+ * On the other hand, kp->opcode has a copy of the first byte of
+ * the probed instruction, which is overwritten by int3. And
+ * the instruction at kp->addr is not modified by kprobes except
+ * for the first byte, we can recover the original instruction
+ * from it and kp->opcode.
+ */
+ memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ buf[0] = kp->opcode;
+ return 0;
+}
+
+/* Dummy buffers for kallsyms_lookup */
+static char __dummy_buf[KSYM_NAME_LEN];
+
+/* Check if paddr is at an instruction boundary */
+static int __kprobes can_probe(unsigned long paddr)
+{
+ int ret;
+ unsigned long addr, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+ if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf))
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr) {
+ kernel_insn_init(&insn, (void *)addr);
+ insn_get_opcode(&insn);
+
+ /*
+ * Check if the instruction has been modified by another
+ * kprobe, in which case we replace the breakpoint by the
+ * original instruction in our buffer.
+ */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
+ ret = recover_probed_instruction(buf, addr);
+ if (ret)
+ /*
+ * Another debugging subsystem might insert
+ * this breakpoint. In that case, we can't
+ * recover it.
+ */
+ return 0;
+ kernel_insn_init(&insn, buf);
+ }
+ insn_get_length(&insn);
+ addr += insn.length;
+ }
+
+ return (addr == paddr);
+}
+
/*
* Returns non-zero if opcode modifies the interrupt flag.
*/
static void __kprobes fix_riprel(struct kprobe *p)
{
#ifdef CONFIG_X86_64
- u8 *insn = p->ainsn.insn;
- s64 disp;
- int need_modrm;
-
- /* Skip legacy instruction prefixes. */
- while (1) {
- switch (*insn) {
- case 0x66:
- case 0x67:
- case 0x2e:
- case 0x3e:
- case 0x26:
- case 0x64:
- case 0x65:
- case 0x36:
- case 0xf0:
- case 0xf3:
- case 0xf2:
- ++insn;
- continue;
- }
- break;
- }
+ struct insn insn;
+ kernel_insn_init(&insn, p->ainsn.insn);
- /* Skip REX instruction prefix. */
- if (is_REX_prefix(insn))
- ++insn;
-
- if (*insn == 0x0f) {
- /* Two-byte opcode. */
- ++insn;
- need_modrm = test_bit(*insn,
- (unsigned long *)twobyte_has_modrm);
- } else
- /* One-byte opcode. */
- need_modrm = test_bit(*insn,
- (unsigned long *)onebyte_has_modrm);
-
- if (need_modrm) {
- u8 modrm = *++insn;
- if ((modrm & 0xc7) == 0x05) {
- /* %rip+disp32 addressing mode */
- /* Displacement follows ModRM byte. */
- ++insn;
- /*
- * The copied instruction uses the %rip-relative
- * addressing mode. Adjust the displacement for the
- * difference between the original location of this
- * instruction and the location of the copy that will
- * actually be run. The tricky bit here is making sure
- * that the sign extension happens correctly in this
- * calculation, since we need a signed 32-bit result to
- * be sign-extended to 64 bits when it's added to the
- * %rip value and yield the same 64-bit result that the
- * sign-extension of the original signed 32-bit
- * displacement would have given.
- */
- disp = (u8 *) p->addr + *((s32 *) insn) -
- (u8 *) p->ainsn.insn;
- BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
- *(s32 *)insn = (s32) disp;
- }
+ if (insn_rip_relative(&insn)) {
+ s64 newdisp;
+ u8 *disp;
+ insn_get_displacement(&insn);
+ /*
+ * The copied instruction uses the %rip-relative addressing
+ * mode. Adjust the displacement for the difference between
+ * the original location of this instruction and the location
+ * of the copy that will actually be run. The tricky bit here
+ * is making sure that the sign extension happens correctly in
+ * this calculation, since we need a signed 32-bit result to
+ * be sign-extended to 64 bits when it's added to the %rip
+ * value and yield the same 64-bit result that the sign-
+ * extension of the original signed 32-bit displacement would
+ * have given.
+ */
+ newdisp = (u8 *) p->addr + (s64) insn.displacement.value -
+ (u8 *) p->ainsn.insn;
+ BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
+ disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn);
+ *(s32 *) disp = (s32) newdisp;
}
#endif
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
+ if (!can_probe((unsigned long)p->addr))
+ return -EILSEQ;
/* insn: must be on special executable page on x86. */
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
-#ifdef CONFIG_X86_64
- /* TODO: Provide re-entrancy from post_kprobes_handler() and
- * avoid exception stack corruption while single-stepping on
- * the instruction of the new probe.
- */
- arch_disarm_kprobe(p);
- regs->ip = (unsigned long)p->addr;
- reset_current_kprobe();
- preempt_enable_no_resched();
- break;
-#endif
case KPROBE_HIT_ACTIVE:
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_REENTER;
break;
case KPROBE_HIT_SS:
- if (p == kprobe_running()) {
- regs->flags &= ~X86_EFLAGS_TF;
- regs->flags |= kcb->kprobe_saved_flags;
- return 0;
- } else {
- /* A probe has been hit in the codepath leading up
- * to, or just after, single-stepping of a probed
- * instruction. This entire codepath should strictly
- * reside in .kprobes.text section. Raise a warning
- * to highlight this peculiar case.
- */
- }
+ /* A probe has been hit in the codepath leading up to, or just
+ * after, single-stepping of a probed instruction. This entire
+ * codepath should strictly reside in .kprobes.text section.
+ * Raise a BUG or we'll continue in an endless reentering loop
+ * and eventually a stack overflow.
+ */
+ printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
+ p->addr);
+ dump_kprobe(p);
+ BUG();
default:
/* impossible cases */
WARN_ON(1);
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
+ if (post_kprobe_handler(args->regs)) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
+ }
break;
case DIE_GPF:
/*
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+ #include <asm/debugreg.h>
asmlinkage extern void ret_from_fork(void);
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+
return err;
}
*/
if (preload_fpu)
__math_state_restore();
+
return prev_p;
}
return do_arch_prctl(current, code, addr);
}
+unsigned long KSTK_ESP(struct task_struct *task)
+{
+ return (test_tsk_thread_flag(task, TIF_IA32)) ?
+ (task_pt_regs(task)->sp) : ((task)->thread.usersp);
+}
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/workqueue.h>
+ #include <linux/perf_event.h>
+ #include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
+ #include <asm/hw_breakpoint.h>
#include "tls.h"
REGSET_IOPERM32,
};
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+#ifdef CONFIG_X86_64
+ REG_OFFSET_NAME(r15),
+ REG_OFFSET_NAME(r14),
+ REG_OFFSET_NAME(r13),
+ REG_OFFSET_NAME(r12),
+ REG_OFFSET_NAME(r11),
+ REG_OFFSET_NAME(r10),
+ REG_OFFSET_NAME(r9),
+ REG_OFFSET_NAME(r8),
+#endif
+ REG_OFFSET_NAME(bx),
+ REG_OFFSET_NAME(cx),
+ REG_OFFSET_NAME(dx),
+ REG_OFFSET_NAME(si),
+ REG_OFFSET_NAME(di),
+ REG_OFFSET_NAME(bp),
+ REG_OFFSET_NAME(ax),
+#ifdef CONFIG_X86_32
+ REG_OFFSET_NAME(ds),
+ REG_OFFSET_NAME(es),
+ REG_OFFSET_NAME(fs),
+ REG_OFFSET_NAME(gs),
+#endif
+ REG_OFFSET_NAME(orig_ax),
+ REG_OFFSET_NAME(ip),
+ REG_OFFSET_NAME(cs),
+ REG_OFFSET_NAME(flags),
+ REG_OFFSET_NAME(sp),
+ REG_OFFSET_NAME(ss),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+static const int arg_offs_table[] = {
+#ifdef CONFIG_X86_32
+ [0] = offsetof(struct pt_regs, ax),
+ [1] = offsetof(struct pt_regs, dx),
+ [2] = offsetof(struct pt_regs, cx)
+#else /* CONFIG_X86_64 */
+ [0] = offsetof(struct pt_regs, di),
+ [1] = offsetof(struct pt_regs, si),
+ [2] = offsetof(struct pt_regs, dx),
+ [3] = offsetof(struct pt_regs, cx),
+ [4] = offsetof(struct pt_regs, r8),
+ [5] = offsetof(struct pt_regs, r9)
+#endif
+};
+
+/**
+ * regs_get_argument_nth() - get Nth argument at function call
+ * @regs: pt_regs which contains registers at function entry.
+ * @n: argument number.
+ *
+ * regs_get_argument_nth() returns @n th argument of a function call.
+ * Since usually the kernel stack will be changed right after function entry,
+ * you must use this at function entry. If the @n th entry is NOT in the
+ * kernel stack or pt_regs, this returns 0.
+ */
+unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
+{
+ if (n < ARRAY_SIZE(arg_offs_table))
+ return *(unsigned long *)((char *)regs + arg_offs_table[n]);
+ else {
+ /*
+ * The typical case: arg n is on the stack.
+ * (Note: stack[0] = return address, so skip it)
+ */
+ n -= ARRAY_SIZE(arg_offs_table);
+ return regs_get_kernel_stack_nth(regs, 1 + n);
+ }
+}
+
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
return 0;
}
- static unsigned long debugreg_addr_limit(struct task_struct *task)
- {
- return TASK_SIZE - 3;
- }
-
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
return 0;
}
- static unsigned long debugreg_addr_limit(struct task_struct *task)
- {
- #ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- return IA32_PAGE_OFFSET - 3;
- #endif
- return TASK_SIZE_MAX - 7;
- }
-
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
return ret;
}
+ static void ptrace_triggered(struct perf_event *bp, void *data)
+ {
+ int i;
+ struct thread_struct *thread = &(current->thread);
+
+ /*
+ * Store in the virtual DR6 register the fact that the breakpoint
+ * was hit so the thread's debugger will see it.
+ */
+ for (i = 0; i < HBP_NUM; i++) {
+ if (thread->ptrace_bps[i] == bp)
+ break;
+ }
+
+ thread->debugreg6 |= (DR_TRAP0 << i);
+ }
+
/*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
*/
- static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
{
- switch (n) {
- case 0: return child->thread.debugreg0;
- case 1: return child->thread.debugreg1;
- case 2: return child->thread.debugreg2;
- case 3: return child->thread.debugreg3;
- case 6: return child->thread.debugreg6;
- case 7: return child->thread.debugreg7;
+ int i;
+ int dr7 = 0;
+ struct arch_hw_breakpoint *info;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ if (bp[i] && !bp[i]->attr.disabled) {
+ info = counter_arch_bp(bp[i]);
+ dr7 |= encode_dr7(i, info->len, info->type);
+ }
}
- return 0;
+
+ return dr7;
}
- static int ptrace_set_debugreg(struct task_struct *child,
- int n, unsigned long data)
+ /*
+ * Handle ptrace writes to debug register 7.
+ */
+ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{
- int i;
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long old_dr7;
+ int i, orig_ret = 0, rc = 0;
+ int enabled, second_pass = 0;
+ unsigned len, type;
+ int gen_len, gen_type;
+ struct perf_event *bp;
+
+ data &= ~DR_CONTROL_RESERVED;
+ old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+ restore:
+ /*
+ * Loop through all the hardware breakpoints, making the
+ * appropriate changes to each.
+ */
+ for (i = 0; i < HBP_NUM; i++) {
+ enabled = decode_dr7(data, i, &len, &type);
+ bp = thread->ptrace_bps[i];
+
+ if (!enabled) {
+ if (bp) {
+ /*
+ * Don't unregister the breakpoints right-away,
+ * unless all register_user_hw_breakpoint()
+ * requests have succeeded. This prevents
+ * any window of opportunity for debug
+ * register grabbing by other users.
+ */
+ if (!second_pass)
+ continue;
+ thread->ptrace_bps[i] = NULL;
+ unregister_hw_breakpoint(bp);
+ }
+ continue;
+ }
- if (unlikely(n == 4 || n == 5))
- return -EIO;
+ /*
+ * We shoud have at least an inactive breakpoint at this
+ * slot. It means the user is writing dr7 without having
+ * written the address register first
+ */
+ if (!bp) {
+ rc = -EINVAL;
+ break;
+ }
- if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
- return -EIO;
+ rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+ if (rc)
+ break;
- switch (n) {
- case 0: child->thread.debugreg0 = data; break;
- case 1: child->thread.debugreg1 = data; break;
- case 2: child->thread.debugreg2 = data; break;
- case 3: child->thread.debugreg3 = data; break;
+ /*
+ * This is a temporary thing as bp is unregistered/registered
+ * to simulate modification
+ */
+ bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
+ gen_type, bp->callback,
+ tsk, true);
+ thread->ptrace_bps[i] = NULL;
- case 6:
- if ((data & ~0xffffffffUL) != 0)
- return -EIO;
- child->thread.debugreg6 = data;
- break;
+ if (!bp) { /* incorrect bp, or we have a bug in bp API */
+ rc = -EINVAL;
+ break;
+ }
+ if (IS_ERR(bp)) {
+ rc = PTR_ERR(bp);
+ bp = NULL;
+ break;
+ }
+ thread->ptrace_bps[i] = bp;
+ }
+ /*
+ * Make a second pass to free the remaining unused breakpoints
+ * or to restore the original breakpoints if an error occurred.
+ */
+ if (!second_pass) {
+ second_pass = 1;
+ if (rc < 0) {
+ orig_ret = rc;
+ data = old_dr7;
+ }
+ goto restore;
+ }
+ return ((orig_ret < 0) ? orig_ret : rc);
+ }
- case 7:
+ /*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+ {
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long val = 0;
+
+ if (n < HBP_NUM) {
+ struct perf_event *bp;
+ bp = thread->ptrace_bps[n];
+ if (!bp)
+ return 0;
+ val = bp->hw.info.address;
+ } else if (n == 6) {
+ val = thread->debugreg6;
+ } else if (n == 7) {
+ val = ptrace_get_dr7(thread->ptrace_bps);
+ }
+ return val;
+ }
+
+ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+ unsigned long addr)
+ {
+ struct perf_event *bp;
+ struct thread_struct *t = &tsk->thread;
+
+ if (!t->ptrace_bps[nr]) {
/*
- * Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System Programming)
+ * Put stub len and type to register (reserve) an inactive but
+ * correct bp
*/
- #ifdef CONFIG_X86_32
- #define DR7_MASK 0x5f54
- #else
- #define DR7_MASK 0x5554
- #endif
- data &= ~DR_CONTROL_RESERVED;
- for (i = 0; i < 4; i++)
- if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
+ bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
+ HW_BREAKPOINT_W,
+ ptrace_triggered, tsk,
+ false);
+ } else {
+ bp = t->ptrace_bps[nr];
+ t->ptrace_bps[nr] = NULL;
+ bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
+ bp->attr.bp_type,
+ bp->callback,
+ tsk,
+ bp->attr.disabled);
}
+ if (!bp)
+ return -EIO;
+ /*
+ * CHECKME: the previous code returned -EIO if the addr wasn't a
+ * valid task virtual addr. The new one will return -EINVAL in this
+ * case.
+ * -EINVAL may be what we want for in-kernel breakpoints users, but
+ * -EIO looks better for ptrace, since we refuse a register writing
+ * for the user. And anyway this is the previous behaviour.
+ */
+ if (IS_ERR(bp))
+ return PTR_ERR(bp);
+
+ t->ptrace_bps[nr] = bp;
+
return 0;
}
+ /*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+ {
+ struct thread_struct *thread = &(tsk->thread);
+ int rc = 0;
+
+ /* There are no DR4 or DR5 registers */
+ if (n == 4 || n == 5)
+ return -EIO;
+
+ if (n == 6) {
+ thread->debugreg6 = val;
+ goto ret_path;
+ }
+ if (n < HBP_NUM) {
+ rc = ptrace_set_breakpoint_addr(tsk, n, val);
+ if (rc)
+ return rc;
+ }
+ /* All that's left is DR7 */
+ if (n == 7)
+ rc = ptrace_write_dr7(tsk, val);
+
+ ret_path:
+ return rc;
+ }
+
/*
* These access the current or another (stopped) task's io permission
* bitmap for debugging or core dump.
#define CREATE_TRACE_POINTS
#include "trace.h"
+ #include <asm/debugreg.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
unsigned bank_num = mcg_cap & 0xff, bank;
r = -EINVAL;
- if (!bank_num)
+ if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
goto out;
if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
goto out;
trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu, kvm_run);
- if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
- set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(current->thread.debugreg7, 7);
- }
+ /*
+ * If the guest has used debug registers, at least dr7
+ * will be disabled while returning to the host.
+ * If we don't have active breakpoints in the host, we don't
+ * care about the messed up debug address registers. But if
+ * we have some of them active, restore the old state.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_restore();
set_bit(KVM_REQ_KICK, &vcpu->requests);
local_irq_enable();
return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
}
-static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
+static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
struct desc_struct *seg_desc)
{
u32 base_addr = get_desc_base(seg_desc);
#include <linux/ioctl.h>
#include <asm/byteorder.h>
+ #ifdef CONFIG_HAVE_HW_BREAKPOINT
+ #include <asm/hw_breakpoint.h>
+ #endif
+
/*
* User-space ABI bits:
*/
PERF_TYPE_TRACEPOINT = 2,
PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4,
+ PERF_TYPE_BREAKPOINT = 5,
PERF_TYPE_MAX, /* non-ABI */
};
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
PERF_COUNT_SW_MAX, /* non-ABI */
};
__u32 wakeup_events; /* wakeup every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
+
+ union {
+ struct { /* Hardware breakpoint info */
+ __u64 bp_addr;
+ __u32 bp_type;
+ __u32 bp_len;
+ };
+ };
+
__u32 __reserved_2;
__u64 __reserved_3;
#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
#define PERF_EVENT_IOC_RESET _IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64)
+#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
unsigned long event_base;
int idx;
};
- union { /* software */
- atomic64_t count;
+ struct { /* software */
+ s64 remaining;
struct hrtimer hrtimer;
};
+ #ifdef CONFIG_HAVE_HW_BREAKPOINT
+ union { /* breakpoint */
+ struct arch_hw_breakpoint info;
+ };
+ #endif
};
atomic64_t prev_count;
u64 sample_period;
void (*func)(struct perf_pending_entry *);
};
+ typedef void (*perf_callback_t)(struct perf_event *, void *);
+
/**
* struct perf_event - performance event kernel representation:
*/
u64 tstamp_running;
u64 tstamp_stopped;
- struct perf_event_attr attr;
+ struct perf_event_attr attr;
struct hw_perf_event hw;
struct perf_event_context *ctx;
struct event_filter *filter;
#endif
+ perf_callback_t callback;
+
+ perf_callback_t event_callback;
+
#endif /* CONFIG_PERF_EVENTS */
};
int nmi;
int sample;
int locked;
- unsigned long flags;
};
#ifdef CONFIG_PERF_EVENTS
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx, int cpu);
extern void perf_event_update_userpage(struct perf_event *event);
+ extern int perf_event_release_kernel(struct perf_event *event);
+ extern struct perf_event *
+ perf_event_create_kernel_counter(struct perf_event_attr *attr,
+ int cpu,
+ pid_t pid,
+ perf_callback_t callback);
+ extern u64 perf_event_read_value(struct perf_event *event);
struct perf_sample_data {
u64 type;
extern void perf_event_init(void);
extern void perf_tp_event(int event_id, u64 addr, u64 count,
void *record, int entry_size);
+ extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
+ static inline void
+ perf_bp_event(struct perf_event *event, void *data) { }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { }
#include <linux/init_task.h>
#include <linux/perf_event.h>
#include <trace/events/sched.h>
+ #include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
{
struct task_struct *curr = current->group_leader;
- if (task_session(curr) != pid) {
+ if (task_session(curr) != pid)
change_pid(curr, PIDTYPE_SID, pid);
- proc_sid_connector(curr);
- }
if (task_pgrp(curr) != pid)
change_pid(curr, PIDTYPE_PGID, pid);
proc_exit_connector(tsk);
+ /*
+ * FIXME: do that only when needed, using sched_exit tracepoint
+ */
+ flush_ptrace_hw_breakpoint(tsk);
/*
* Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications.
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
+ #include <linux/hw_breakpoint.h>
#include <asm/irq_regs.h>
u64 interrupts, freq;
spin_lock(&ctx->lock);
- list_for_each_entry(event, &ctx->group_list, group_entry) {
+ list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
return 0;
}
+ int perf_event_release_kernel(struct perf_event *event)
+ {
+ struct perf_event_context *ctx = event->ctx;
+
+ WARN_ON_ONCE(ctx->parent_ctx);
+ mutex_lock(&ctx->mutex);
+ perf_event_remove_from_context(event);
+ mutex_unlock(&ctx->mutex);
+
+ mutex_lock(&event->owner->perf_event_mutex);
+ list_del_init(&event->owner_entry);
+ mutex_unlock(&event->owner->perf_event_mutex);
+ put_task_struct(event->owner);
+
+ free_event(event);
+
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+
static int perf_event_read_size(struct perf_event *event)
{
int entry = sizeof(u64); /* value */
return size;
}
- static u64 perf_event_read_value(struct perf_event *event)
+ u64 perf_event_read_value(struct perf_event *event)
{
struct perf_event *child;
u64 total = 0;
return total;
}
+ EXPORT_SYMBOL_GPL(perf_event_read_value);
static int perf_event_read_entry(struct perf_event *event,
u64 read_format, char __user *buf)
static void perf_output_lock(struct perf_output_handle *handle)
{
struct perf_mmap_data *data = handle->data;
- int cpu;
+ int cur, cpu = get_cpu();
handle->locked = 0;
- local_irq_save(handle->flags);
- cpu = smp_processor_id();
-
- if (in_nmi() && atomic_read(&data->lock) == cpu)
- return;
+ for (;;) {
+ cur = atomic_cmpxchg(&data->lock, -1, cpu);
+ if (cur == -1) {
+ handle->locked = 1;
+ break;
+ }
+ if (cur == cpu)
+ break;
- while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
cpu_relax();
-
- handle->locked = 1;
+ }
}
static void perf_output_unlock(struct perf_output_handle *handle)
if (atomic_xchg(&data->wakeup, 0))
perf_output_wakeup(handle);
out:
- local_irq_restore(handle->flags);
+ put_cpu();
}
void perf_output_copy(struct perf_output_handle *handle,
regs = task_pt_regs(current);
if (regs) {
- if (perf_event_overflow(event, 0, &data, regs))
- ret = HRTIMER_NORESTART;
+ if (!(event->attr.exclude_idle && current->pid == 0))
+ if (perf_event_overflow(event, 0, &data, regs))
+ ret = HRTIMER_NORESTART;
}
period = max_t(u64, 10000, event->hw.sample_period);
return ret;
}
+static void perf_swevent_start_hrtimer(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hwc->hrtimer.function = perf_swevent_hrtimer;
+ if (hwc->sample_period) {
+ u64 period;
+
+ if (hwc->remaining) {
+ if (hwc->remaining < 0)
+ period = 10000;
+ else
+ period = hwc->remaining;
+ hwc->remaining = 0;
+ } else {
+ period = max_t(u64, 10000, hwc->sample_period);
+ }
+ __hrtimer_start_range_ns(&hwc->hrtimer,
+ ns_to_ktime(period), 0,
+ HRTIMER_MODE_REL, 0);
+ }
+}
+
+static void perf_swevent_cancel_hrtimer(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->sample_period) {
+ ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
+ hwc->remaining = ktime_to_ns(remaining);
+
+ hrtimer_cancel(&hwc->hrtimer);
+ }
+}
+
/*
* Software event: cpu wall time clock
*/
int cpu = raw_smp_processor_id();
atomic64_set(&hwc->prev_count, cpu_clock(cpu));
- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hwc->hrtimer.function = perf_swevent_hrtimer;
- if (hwc->sample_period) {
- u64 period = max_t(u64, 10000, hwc->sample_period);
- __hrtimer_start_range_ns(&hwc->hrtimer,
- ns_to_ktime(period), 0,
- HRTIMER_MODE_REL, 0);
- }
+ perf_swevent_start_hrtimer(event);
return 0;
}
static void cpu_clock_perf_event_disable(struct perf_event *event)
{
- if (event->hw.sample_period)
- hrtimer_cancel(&event->hw.hrtimer);
+ perf_swevent_cancel_hrtimer(event);
cpu_clock_perf_event_update(event);
}
now = event->ctx->time;
atomic64_set(&hwc->prev_count, now);
- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hwc->hrtimer.function = perf_swevent_hrtimer;
- if (hwc->sample_period) {
- u64 period = max_t(u64, 10000, hwc->sample_period);
- __hrtimer_start_range_ns(&hwc->hrtimer,
- ns_to_ktime(period), 0,
- HRTIMER_MODE_REL, 0);
- }
+
+ perf_swevent_start_hrtimer(event);
return 0;
}
static void task_clock_perf_event_disable(struct perf_event *event)
{
- if (event->hw.sample_period)
- hrtimer_cancel(&event->hw.hrtimer);
+ perf_swevent_cancel_hrtimer(event);
task_clock_perf_event_update(event, event->ctx->time);
}
#endif /* CONFIG_EVENT_PROFILE */
+ #ifdef CONFIG_HAVE_HW_BREAKPOINT
+ static void bp_perf_event_destroy(struct perf_event *event)
+ {
+ release_bp_slot(event);
+ }
+
+ static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+ {
+ int err;
+ /*
+ * The breakpoint is already filled if we haven't created the counter
+ * through perf syscall
+ * FIXME: manage to get trigerred to NULL if it comes from syscalls
+ */
+ if (!bp->callback)
+ err = register_perf_hw_breakpoint(bp);
+ else
+ err = __register_perf_hw_breakpoint(bp);
+ if (err)
+ return ERR_PTR(err);
+
+ bp->destroy = bp_perf_event_destroy;
+
+ return &perf_ops_bp;
+ }
+
+ void perf_bp_event(struct perf_event *bp, void *regs)
+ {
+ /* TODO */
+ }
+ #else
+ static void bp_perf_event_destroy(struct perf_event *event)
+ {
+ }
+
+ static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+ {
+ return NULL;
+ }
+
+ void perf_bp_event(struct perf_event *bp, void *regs)
+ {
+ }
+ #endif
+
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
static void sw_perf_event_destroy(struct perf_event *event)
case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
case PERF_COUNT_SW_CONTEXT_SWITCHES:
case PERF_COUNT_SW_CPU_MIGRATIONS:
+ case PERF_COUNT_SW_ALIGNMENT_FAULTS:
+ case PERF_COUNT_SW_EMULATION_FAULTS:
if (!event->parent) {
atomic_inc(&perf_swevent_enabled[event_id]);
event->destroy = sw_perf_event_destroy;
struct perf_event_context *ctx,
struct perf_event *group_leader,
struct perf_event *parent_event,
+ perf_callback_t callback,
gfp_t gfpflags)
{
const struct pmu *pmu;
event->state = PERF_EVENT_STATE_INACTIVE;
+ if (!callback && parent_event)
+ callback = parent_event->callback;
+
+ event->callback = callback;
+
if (attr->disabled)
event->state = PERF_EVENT_STATE_OFF;
pmu = tp_perf_event_init(event);
break;
+ case PERF_TYPE_BREAKPOINT:
+ pmu = bp_perf_event_init(event);
+ break;
+
+
default:
break;
}
}
event = perf_event_alloc(&attr, cpu, ctx, group_leader,
- NULL, GFP_KERNEL);
+ NULL, NULL, GFP_KERNEL);
err = PTR_ERR(event);
if (IS_ERR(event))
goto err_put_context;
return err;
}
+ /**
+ * perf_event_create_kernel_counter
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @pid: task to profile
+ */
+ struct perf_event *
+ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+ pid_t pid, perf_callback_t callback)
+ {
+ struct perf_event *event;
+ struct perf_event_context *ctx;
+ int err;
+
+ /*
+ * Get the target context (task or percpu):
+ */
+
+ ctx = find_get_context(pid, cpu);
+ if (IS_ERR(ctx))
+ return NULL;
+
+ event = perf_event_alloc(attr, cpu, ctx, NULL,
+ NULL, callback, GFP_KERNEL);
+ err = PTR_ERR(event);
+ if (IS_ERR(event))
+ goto err_put_context;
+
+ event->filp = NULL;
+ WARN_ON_ONCE(ctx->parent_ctx);
+ mutex_lock(&ctx->mutex);
+ perf_install_in_context(ctx, event, cpu);
+ ++ctx->generation;
+ mutex_unlock(&ctx->mutex);
+
+ event->owner = current;
+ get_task_struct(current);
+ mutex_lock(¤t->perf_event_mutex);
+ list_add_tail(&event->owner_entry, ¤t->perf_event_list);
+ mutex_unlock(¤t->perf_event_mutex);
+
+ return event;
+
+ err_put_context:
+ if (err < 0)
+ put_ctx(ctx);
+
+ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+
/*
* inherit a event from parent task to child task:
*/
child_event = perf_event_alloc(&parent_event->attr,
parent_event->cpu, child_ctx,
group_leader, parent_event,
- GFP_KERNEL);
+ NULL, GFP_KERNEL);
if (IS_ERR(child_event))
return child_event;
get_ctx(child_ctx);
power management decisions, specifically the C-state and P-state
behavior.
+ config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+ config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
If unsure, say N.
+config KPROBE_EVENT
+ depends on KPROBES
+ depends on X86
+ bool "Enable kprobes-based dynamic events"
+ select TRACING
+ default y
+ help
+ This allows the user to add tracing events (similar to tracepoints) on the fly
+ via the ftrace interface. See Documentation/trace/kprobetrace.txt
+ for more details.
+
+ Those events can be inserted wherever kprobes can probe, and record
+ various register and memory values.
+
+ This option is also required by perf-probe subcommand of perf tools. If
+ you want to use perf tools, this option is strongly recommended.
+
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FUNCTION_TRACER
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+ obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
libftrace-y := ftrace.o
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <linux/kmemtrace.h>
+ #include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
unsigned long ret;
};
+struct kprobe_trace_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ int nargs;
+ unsigned long args[];
+};
+
+#define SIZEOF_KPROBE_TRACE_ENTRY(n) \
+ (offsetof(struct kprobe_trace_entry, args) + \
+ (sizeof(unsigned long) * (n)))
+
+struct kretprobe_trace_entry {
+ struct trace_entry ent;
+ unsigned long func;
+ unsigned long ret_ip;
+ int nargs;
+ unsigned long args[];
+};
+
+#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
+ (offsetof(struct kretprobe_trace_entry, args) + \
+ (sizeof(unsigned long) * (n)))
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
+ IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
void unregister_tracer(struct tracer *type);
int is_tracing_stopped(void);
+ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
+
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
#ifdef CONFIG_TRACER_MAX_TRACE
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
+ extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);