/*
- * File: arch/blackfin/mach-common/entry.S
- * Based on:
- * Author: Linus Torvalds
+ * Contains the system-call and fault low-level handling routines.
+ * This also contains the timer-interrupt handler, as well as all
+ * interrupts and faults that can result in a task-switch.
*
- * Created: ?
- * Description: contains the system-call and fault low-level handling routines.
- * This also contains the timer-interrupt handler, as well as all
- * interrupts and faults that can result in a task-switch.
+ * Copyright 2005-2009 Analog Devices Inc.
*
- * Modified:
- * Copyright 2004-2006 Analog Devices Inc.
- *
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see the file COPYING, or write
- * to the Free Software Foundation, Inc.,
- * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * Licensed under the GPL-2 or later.
*/
/* NOTE: This code handles signal-recognition, which happens every time
cc = BITTST(r7, TIF_RESTORE_SIGMASK);
if cc jump .Lsyscall_do_signals;
cc = BITTST(r7, TIF_SIGPENDING);
+ if cc jump .Lsyscall_do_signals;
+ cc = BITTST(r7, TIF_NOTIFY_RESUME);
if !cc jump .Lsyscall_really_exit;
.Lsyscall_do_signals:
/* Reenable interrupts. */
r0 = sp;
SP += -12;
- call _do_signal;
+ call _do_notify_resume;
SP += 12;
.Lsyscall_really_exit:
ENTRY(_ret_from_exception)
#ifdef CONFIG_IPIPE
- p2.l = _per_cpu__ipipe_percpu_domain;
- p2.h = _per_cpu__ipipe_percpu_domain;
+ p2.l = _ipipe_percpu_domain;
+ p2.h = _ipipe_percpu_domain;
r0.l = _ipipe_root;
r0.h = _ipipe_root;
r2 = [p2];
.long _sys_ni_syscall /* streams2 */
.long _sys_vfork /* 190 */
.long _sys_getrlimit
- .long _sys_mmap2
+ .long _sys_mmap_pgoff
.long _sys_truncate64
.long _sys_ftruncate64
.long _sys_stat64 /* 195 */
.long _sys_pwritev
.long _sys_rt_tgsigqueueinfo
.long _sys_perf_event_open
+ .long _sys_recvmmsg /* 370 */
.rept NR_syscalls-(.-_sys_call_table)/4
.long _sys_ni_syscall
struct tss_struct;
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
+ extern void show_regs_common(void);
#ifdef CONFIG_X86_32
"movl %P[task_canary](%[next]), %%ebx\n\t" \
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
- , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
+ , [stack_canary] "=m" (stack_canary.canary)
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
"movq %P[task_canary](%%rsi),%%r8\n\t" \
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
#define __switch_canary_oparam \
- , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+ , [gs_canary] "=m" (irq_stack_union.stack_canary)
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \
- ".globl thread_return\n" \
- "thread_return:\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
__switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
"movq %%rax,%%rdi\n\t" \
- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
"jnz ret_from_fork\n\t" \
RESTORE_CONTEXT \
: "=a" (last) \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[_tif_fork] "i" (_TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
- [current_task] "m" (per_cpu_var(current_task)) \
+ [current_task] "m" (current_task) \
__switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
* Load a segment. Fall back on loading the zero
* segment if something goes wrong..
*/
- #define loadsegment(seg, value) \
- asm volatile("\n" \
- "1:\t" \
- "movl %k0,%%" #seg "\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:\t" \
- "movl %k1, %%" #seg "\n\t" \
- "jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b,3b) \
- : :"r" (value), "r" (0) : "memory")
-
+ #define loadsegment(seg, value) \
+ do { \
+ unsigned short __val = (value); \
+ \
+ asm volatile(" \n" \
+ "1: movl %k0,%%" #seg " \n" \
+ \
+ ".section .fixup,\"ax\" \n" \
+ "2: xorl %k0,%k0 \n" \
+ " jmp 1b \n" \
+ ".previous \n" \
+ \
+ _ASM_EXTABLE(1b, 2b) \
+ \
+ : "+r" (__val) : : "memory"); \
+ } while (0)
/*
* Save a segment register away
int unknown_nmi_panic;
int nmi_watchdog_enabled;
- static cpumask_t backtrace_mask __read_mostly;
+ /* For reliability, we're prepared to waste bits here. */
+ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
}
/* We can be called before check_nmi_watchdog, hence NULL check. */
- if (cpumask_test_cpu(cpu, &backtrace_mask)) {
+ if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
show_regs(regs);
dump_stack();
spin_unlock(&lock);
- cpumask_clear_cpu(cpu, &backtrace_mask);
+ cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
rc = 1;
}
* Ayiee, looks like this CPU is stuck ...
* wait a few IRQs (5 seconds) before doing the oops ...
*/
- __this_cpu_inc(per_cpu_var(alert_counter));
- if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
+ __this_cpu_inc(alert_counter);
+ if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
/*
* die_nmi will return ONLY if NOTIFY_STOP happens..
*/
regs, panic_on_timeout);
} else {
__get_cpu_var(last_irq_sum) = sum;
- __this_cpu_write(per_cpu_var(alert_counter), 0);
+ __this_cpu_write(alert_counter, 0);
}
/* see if the nmi watchdog went off */
{
int i;
- cpumask_copy(&backtrace_mask, cpu_online_mask);
+ cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
apic->send_IPI_all(NMI_VECTOR);
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
- if (cpumask_empty(&backtrace_mask))
+ if (cpumask_empty(to_cpumask(backtrace_mask)))
break;
mdelay(1);
}
#include <asm/asm-offsets.h>
#include <asm/setup.h>
#include <asm/processor-flags.h>
+ #include <asm/msr-index.h>
+ #include <asm/cpufeature.h>
#include <asm/percpu.h>
/* Physical address */
orl %edx,%eax
movl %eax,%cr4
- btl $5, %eax # check if PAE is enabled
- jnc 6f
+ testb $X86_CR4_PAE, %al # check if PAE is enabled
+ jz 6f
/* Check if extended functions are implemented */
movl $0x80000000, %eax
cpuid
- cmpl $0x80000000, %eax
- jbe 6f
+ /* Value must be in the range 0x80000001 to 0x8000ffff */
+ subl $0x80000001, %eax
+ cmpl $(0x8000ffff-0x80000001), %eax
+ ja 6f
mov $0x80000001, %eax
cpuid
/* Execute Disable bit supported? */
- btl $20, %edx
+ btl $(X86_FEATURE_NX & 31), %edx
jnc 6f
/* Setup EFER (Extended Feature Enable Register) */
- movl $0xc0000080, %ecx
+ movl $MSR_EFER, %ecx
rdmsr
- btsl $11, %eax
+ btsl $_EFER_NX, %eax
/* Make changes effective */
wrmsr
*/
cmpb $0,ready
jne 1f
- movl $per_cpu__gdt_page,%eax
- movl $per_cpu__stack_canary,%ecx
+ movl $gdt_page,%eax
+ movl $stack_canary,%ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
.word 0 # 32 bit align gdt_desc.address
ENTRY(early_gdt_descr)
.word GDT_ENTRIES*8-1
- .long per_cpu__gdt_page /* Overwritten for secondary CPUs */
+ .long gdt_page /* Overwritten for secondary CPUs */
/*
* The boot_gdt must mirror the equivalent in setup.S and is
jiffies_64 = jiffies;
#endif
+ #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+ /*
+ * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
+ * we retain large page mappings for boundaries spanning kernel text, rodata
+ * and data sections.
+ *
+ * However, kernel identity mappings will have different RWX permissions
+ * to the pages mapping to text and to the pages padding (which are freed) the
+ * text section. Hence kernel identity mappings will be broken to smaller
+ * pages. For 64-bit, kernel text and kernel identity mappings are different,
+ * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
+ * as well as retain 2MB large page mappings for kernel text.
+ */
+ #define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
+
+ #define X64_ALIGN_DEBUG_RODATA_END \
+ . = ALIGN(HPAGE_SIZE); \
+ __end_rodata_hpage_align = .;
+
+ #else
+
+ #define X64_ALIGN_DEBUG_RODATA_BEGIN
+ #define X64_ALIGN_DEBUG_RODATA_END
+
+ #endif
+
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
EXCEPTION_TABLE(16) :text = 0x9090
+ X64_ALIGN_DEBUG_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
+ X64_ALIGN_DEBUG_RODATA_END
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
PAGE_ALIGNED_DATA(PAGE_SIZE)
- CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
+ CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
DATA_DATA
CONSTRUCTORS
/* rarely changed data like cpu maps */
- READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
+ READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
/* End of data section */
_edata = .;
*(.vsyscall_0)
} :user
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
*(.vsyscall_fn)
}
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
*(.vsyscall_gtod_data)
}
}
vgetcpu_mode = VVIRT(.vgetcpu_mode);
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(L1_CACHE_BYTES);
.jiffies : AT(VLOAD(.jiffies)) {
*(.jiffies)
}
__brk_limit = .;
}
- .end : AT(ADDR(.end) - LOAD_OFFSET) {
- _end = .;
- }
+ _end = .;
STABS_DEBUG
DWARF_DEBUG
#ifdef CONFIG_X86_32
+ /*
+ * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
+ */
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
#else
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
-#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
"kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
-. = ASSERT((per_cpu__irq_stack_union == 0),
+. = ASSERT((irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
#ifdef __CHECKER__
# define __user __attribute__((noderef, address_space(1)))
-# define __kernel /* default address space */
+# define __kernel __attribute__((address_space(0)))
# define __safe __attribute__((safe))
# define __force __attribute__((force))
# define __nocast __attribute__((nocast))
# define __acquire(x) __context__(x,1)
# define __release(x) __context__(x,-1)
# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
+# define __percpu __attribute__((noderef, address_space(3)))
extern void __chk_user_ptr(const volatile void __user *);
extern void __chk_io_ptr(const volatile void __iomem *);
#else
# define __acquire(x) (void)0
# define __release(x) (void)0
# define __cond_lock(x,c) (c)
+# define __percpu
#endif
#ifdef __KERNEL__
# define barrier() __memory_barrier()
#endif
+ /* Unreachable code */
+ #ifndef unreachable
+ # define unreachable() do { } while (1)
+ #endif
+
#ifndef RELOC_HIDE
# define RELOC_HIDE(ptr, off) \
({ unsigned long __ptr; \
# define __maybe_unused /* unimplemented */
#endif
+ #ifndef __always_unused
+ # define __always_unused /* unimplemented */
+ #endif
+
#ifndef noinline
#define noinline
#endif
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#endif
+ /* Compile time object size, -1 for unknown */
+ #ifndef __compiletime_object_size
+ # define __compiletime_object_size(obj) -1
+ #endif
+ #ifndef __compiletime_warning
+ # define __compiletime_warning(message)
+ #endif
+ #ifndef __compiletime_error
+ # define __compiletime_error(message)
+ #endif
+
/*
* Prevent the compiler from merging or refetching accesses. The compiler
* is also forbidden from reordering successive instances of ACCESS_ONCE(),
* we force a syntax error here if it isn't.
*/
#define get_cpu_var(var) (*({ \
- extern int simple_identifier_##var(void); \
preempt_disable(); \
&__get_cpu_var(var); }))
-#define put_cpu_var(var) preempt_enable()
+
+/*
+ * The weird & is necessary because sparse considers (void)(var) to be
+ * a direct dereference of percpu variable (var).
+ */
+#define put_cpu_var(var) do { \
+ (void)&(var); \
+ preempt_enable(); \
+} while (0)
#ifdef CONFIG_SMP
*/
#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
-extern void *__alloc_reserved_percpu(size_t size, size_t align);
-extern void *__alloc_percpu(size_t size, size_t align);
-extern void free_percpu(void *__pdata);
+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+extern void __percpu *__alloc_percpu(size_t size, size_t align);
+extern void free_percpu(void __percpu *__pdata);
+ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
extern void __init setup_per_cpu_areas(void);
#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
-static inline void *__alloc_percpu(size_t size, size_t align)
+static inline void __percpu *__alloc_percpu(size_t size, size_t align)
{
/*
* Can't easily make larger alignment work with kmalloc. WARN
return kzalloc(size, GFP_KERNEL);
}
-static inline void free_percpu(void *p)
+static inline void free_percpu(void __percpu *p)
{
kfree(p);
}
+ static inline phys_addr_t per_cpu_ptr_to_phys(void *addr)
+ {
+ return __pa(addr);
+ }
+
static inline void __init setup_per_cpu_areas(void) { }
static inline void *pcpu_lpage_remapped(void *kaddr)
#endif /* CONFIG_SMP */
#define alloc_percpu(type) \
- (typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
+ (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
/*
* Optional methods for optimized non-lvalue per-cpu variable access.
#ifndef percpu_read
# define percpu_read(var) \
({ \
- typeof(per_cpu_var(var)) __tmp_var__; \
- __tmp_var__ = get_cpu_var(var); \
- put_cpu_var(var); \
- __tmp_var__; \
+ typeof(var) *pr_ptr__ = &(var); \
+ typeof(var) pr_ret__; \
+ pr_ret__ = get_cpu_var(*pr_ptr__); \
+ put_cpu_var(*pr_ptr__); \
+ pr_ret__; \
})
#endif
#define __percpu_generic_to_op(var, val, op) \
do { \
- get_cpu_var(var) op val; \
- put_cpu_var(var); \
+ typeof(var) *pgto_ptr__ = &(var); \
+ get_cpu_var(*pgto_ptr__) op val; \
+ put_cpu_var(*pgto_ptr__); \
} while (0)
#ifndef percpu_write
#define __pcpu_size_call_return(stem, variable) \
({ typeof(variable) pscr_ret__; \
+ __verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: pscr_ret__ = stem##1(variable);break; \
case 2: pscr_ret__ = stem##2(variable);break; \
#define __pcpu_size_call(stem, variable, ...) \
do { \
+ __verify_pcpu_ptr(&(variable)); \
switch(sizeof(variable)) { \
case 1: stem##1(variable, __VA_ARGS__);break; \
case 2: stem##2(variable, __VA_ARGS__);break; \
/*
* Optimized manipulation for memory allocated through the per cpu
- * allocator or for addresses of per cpu variables (can be determined
- * using per_cpu_var(xx).
+ * allocator or for addresses of per cpu variables.
*
* These operation guarantee exclusivity of access for other operations
* on the *same* processor. The assumption is that per cpu data is only
#define _this_cpu_generic_to_op(pcp, val, op) \
do { \
preempt_disable(); \
- *__this_cpu_ptr(&pcp) op val; \
+ *__this_cpu_ptr(&(pcp)) op val; \
preempt_enable(); \
} while (0)
PGSCAN_ZONE_RECLAIM_FAILED,
#endif
PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+ KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
+ KSWAPD_SKIP_CONGESTION_WAIT,
PAGEOUTRUN, ALLOCSTALL, PGROTATED,
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
static inline void __count_vm_event(enum vm_event_item item)
{
- __this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+ __this_cpu_inc(vm_event_states.event[item]);
}
static inline void count_vm_event(enum vm_event_item item)
{
- this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+ this_cpu_inc(vm_event_states.event[item]);
}
static inline void __count_vm_events(enum vm_event_item item, long delta)
{
- __this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+ __this_cpu_add(vm_event_states.event[item], delta);
}
static inline void count_vm_events(enum vm_event_item item, long delta)
{
- this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+ this_cpu_add(vm_event_states.event[item], delta);
}
extern void all_vm_events(unsigned long *);
cur_ops->deferred_free(rp);
}
+ static int rcu_no_completed(void)
+ {
+ return 0;
+ }
+
static void rcu_torture_deferred_free(struct rcu_torture *p)
{
call_rcu(&p->rtort_rcu, rcu_torture_cb);
.name = "rcu_sync"
};
+ static struct rcu_torture_ops rcu_expedited_ops = {
+ .init = rcu_sync_torture_init,
+ .cleanup = NULL,
+ .readlock = rcu_torture_read_lock,
+ .read_delay = rcu_read_delay, /* just reuse rcu's version. */
+ .readunlock = rcu_torture_read_unlock,
+ .completed = rcu_no_completed,
+ .deferred_free = rcu_sync_torture_deferred_free,
+ .sync = synchronize_rcu_expedited,
+ .cb_barrier = NULL,
+ .stats = NULL,
+ .irq_capable = 1,
+ .name = "rcu_expedited"
+ };
+
/*
* Definitions for rcu_bh torture testing.
*/
.name = "srcu"
};
+ static void srcu_torture_synchronize_expedited(void)
+ {
+ synchronize_srcu_expedited(&srcu_ctl);
+ }
+
+ static struct rcu_torture_ops srcu_expedited_ops = {
+ .init = srcu_torture_init,
+ .cleanup = srcu_torture_cleanup,
+ .readlock = srcu_torture_read_lock,
+ .read_delay = srcu_read_delay,
+ .readunlock = srcu_torture_read_unlock,
+ .completed = srcu_torture_completed,
+ .deferred_free = rcu_sync_torture_deferred_free,
+ .sync = srcu_torture_synchronize_expedited,
+ .cb_barrier = NULL,
+ .stats = srcu_torture_stats,
+ .name = "srcu_expedited"
+ };
+
/*
* Definitions for sched torture testing.
*/
preempt_enable();
}
- static int sched_torture_completed(void)
- {
- return 0;
- }
-
static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
- .completed = sched_torture_completed,
+ .completed = rcu_no_completed,
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
.name = "sched"
};
- static struct rcu_torture_ops sched_ops_sync = {
+ static struct rcu_torture_ops sched_sync_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
- .completed = sched_torture_completed,
+ .completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
.name = "sched_sync"
};
- extern int rcu_expedited_torture_stats(char *page);
-
static struct rcu_torture_ops sched_expedited_ops = {
.init = rcu_sync_torture_init,
.cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
- .completed = sched_torture_completed,
+ .completed = rcu_no_completed,
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
old_rp = rcu_torture_current;
rp->rtort_mbtest = 1;
rcu_assign_pointer(rcu_torture_current, rp);
- smp_wmb();
+ smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
if (old_rp) {
i = old_rp->rtort_pipe_count;
if (i > RCU_TORTURE_PIPE_LEN)
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
- __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+ __this_cpu_inc(rcu_torture_count[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
- __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+ __this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
cur_ops->readunlock(idx);
}
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
- __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+ __this_cpu_inc(rcu_torture_count[pipe_count]);
completed = cur_ops->completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
- __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+ __this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
cur_ops->readunlock(idx);
schedule();
int cpu;
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] =
- { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
- &sched_expedited_ops,
- &srcu_ops, &sched_ops, &sched_ops_sync, };
+ { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
+ &rcu_bh_ops, &rcu_bh_sync_ops,
+ &srcu_ops, &srcu_expedited_ops,
+ &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
mutex_lock(&fullstop_mutex);
break;
}
if (i == ARRAY_SIZE(torture_ops)) {
- printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
+ printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n",
torture_type);
+ printk(KERN_ALERT "rcu-torture types:");
+ for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
+ printk(KERN_ALERT " %s", torture_ops[i]->name);
+ printk(KERN_ALERT "\n");
mutex_unlock(&fullstop_mutex);
return -EINVAL;
}
* Copyright (C) 2004 William Lee Irwin III
*/
#include <linux/ring_buffer.h>
- #include <linux/utsrelease.h>
+ #include <generated/utsrelease.h>
#include <linux/stacktrace.h>
#include <linux/writeback.h>
#include <linux/kallsyms.h>
static inline void ftrace_disable_cpu(void)
{
preempt_disable();
- __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_inc(ftrace_cpu_disabled);
}
static inline void ftrace_enable_cpu(void)
{
- __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
+ __this_cpu_dec(ftrace_cpu_disabled);
preempt_enable();
}
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
static char *default_bootup_tracer;
- static int __init set_ftrace(char *str)
+ static int __init set_cmdline_ftrace(char *str)
{
strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
default_bootup_tracer = bootup_tracer_buf;
ring_buffer_expanded = 1;
return 1;
}
- __setup("ftrace=", set_ftrace);
+ __setup("ftrace=", set_cmdline_ftrace);
static int __init set_ftrace_dump_on_oops(char *str)
{
"bin",
"block",
"stacktrace",
- "sched-tree",
"trace_printk",
"ftrace_preempt",
"branch",
* protected by per_cpu spinlocks. But the action of the swap
* needs its own lock.
*
- * This is defined as a raw_spinlock_t in order to help
+ * This is defined as a arch_spinlock_t in order to help
* with performance when lockdep debugging is enabled.
*
* It is also used in other places outside the update_max_tr
* so it needs to be defined outside of the
* CONFIG_TRACER_MAX_TRACE.
*/
- static raw_spinlock_t ftrace_max_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t ftrace_max_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
#ifdef CONFIG_TRACER_MAX_TRACE
unsigned long __read_mostly tracing_max_latency;
return;
WARN_ON_ONCE(!irqs_disabled());
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
max_tr.buffer = buf;
__update_max_tr(tr, tsk, cpu);
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
}
/**
return;
WARN_ON_ONCE(!irqs_disabled());
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
ftrace_disable_cpu();
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
__update_max_tr(tr, tsk, cpu);
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
}
#endif /* CONFIG_TRACER_MAX_TRACE */
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
static int cmdline_idx;
- static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;
* nor do we want to disable interrupts,
* so if we miss here, then better luck next time.
*/
- if (!__raw_spin_trylock(&trace_cmdline_lock))
+ if (!arch_spin_trylock(&trace_cmdline_lock))
return;
idx = map_pid_to_cmdline[tsk->pid];
memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
- __raw_spin_unlock(&trace_cmdline_lock);
+ arch_spin_unlock(&trace_cmdline_lock);
}
void trace_find_cmdline(int pid, char comm[])
}
preempt_disable();
- __raw_spin_lock(&trace_cmdline_lock);
+ arch_spin_lock(&trace_cmdline_lock);
map = map_pid_to_cmdline[pid];
if (map != NO_CMDLINE_MAP)
strcpy(comm, saved_cmdlines[map]);
else
strcpy(comm, "<...>");
- __raw_spin_unlock(&trace_cmdline_lock);
+ arch_spin_unlock(&trace_cmdline_lock);
preempt_enable();
}
struct ftrace_entry *entry;
/* If we are reading the ring buffer, don't trace */
- if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
return;
event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
__ftrace_trace_stack(tr->buffer, flags, skip, pc);
}
+ /**
+ * trace_dump_stack - record a stack back trace in the trace buffer
+ */
+ void trace_dump_stack(void)
+ {
+ unsigned long flags;
+
+ if (tracing_disabled || tracing_selftest_running)
+ return;
+
+ local_save_flags(flags);
+
+ /* skipping 3 traces, seems to get us at the caller of this function */
+ __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+ }
+
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
- static raw_spinlock_t trace_buf_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_buf_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static u32 trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_bprint;
/* Lockdep uses trace_printk for lock tracing */
local_irq_save(flags);
- __raw_spin_lock(&trace_buf_lock);
+ arch_spin_lock(&trace_buf_lock);
len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
if (len > TRACE_BUF_SIZE || len < 0)
ring_buffer_unlock_commit(buffer, event);
out_unlock:
- __raw_spin_unlock(&trace_buf_lock);
+ arch_spin_unlock(&trace_buf_lock);
local_irq_restore(flags);
out:
int trace_array_vprintk(struct trace_array *tr,
unsigned long ip, const char *fmt, va_list args)
{
- static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static char trace_buf[TRACE_BUF_SIZE];
struct ftrace_event_call *call = &event_print;
pause_graph_tracing();
raw_local_irq_save(irq_flags);
- __raw_spin_lock(&trace_buf_lock);
+ arch_spin_lock(&trace_buf_lock);
len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
- len = min(len, TRACE_BUF_SIZE-1);
- trace_buf[len] = 0;
-
size = sizeof(*entry) + len + 1;
buffer = tr->buffer;
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
- entry->ip = ip;
+ entry->ip = ip;
memcpy(&entry->buf, trace_buf, len);
- entry->buf[len] = 0;
+ entry->buf[len] = '\0';
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
out_unlock:
- __raw_spin_unlock(&trace_buf_lock);
+ arch_spin_unlock(&trace_buf_lock);
raw_local_irq_restore(irq_flags);
unpause_graph_tracing();
out:
int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
- return trace_array_printk(&global_trace, ip, fmt, args);
+ return trace_array_vprintk(&global_trace, ip, fmt, args);
}
EXPORT_SYMBOL_GPL(trace_vprintk);
int i = (int)*pos;
void *ent;
+ WARN_ON_ONCE(iter->leftover);
+
(*pos)++;
/* can't go backwards */
;
} else {
- l = *pos - 1;
- p = s_next(m, p, &l);
+ /*
+ * If we overflowed the seq_file before, then we want
+ * to just reuse the trace_seq buffer again.
+ */
+ if (iter->leftover)
+ p = iter;
+ else {
+ l = *pos - 1;
+ p = s_next(m, p, &l);
+ }
}
trace_event_read_lock();
static int s_show(struct seq_file *m, void *v)
{
struct trace_iterator *iter = v;
+ int ret;
if (iter->ent == NULL) {
if (iter->tr) {
if (!(trace_flags & TRACE_ITER_VERBOSE))
print_func_help_header(m);
}
+ } else if (iter->leftover) {
+ /*
+ * If we filled the seq_file buffer earlier, we
+ * want to just show it now.
+ */
+ ret = trace_print_seq(m, &iter->seq);
+
+ /* ret should this time be zero, but you never know */
+ iter->leftover = ret;
+
} else {
print_trace_line(iter);
- trace_print_seq(m, &iter->seq);
+ ret = trace_print_seq(m, &iter->seq);
+ /*
+ * If we overflow the seq_file buffer, then it will
+ * ask us for this data again at start up.
+ * Use that instead.
+ * ret is 0 if seq_file write succeeded.
+ * -1 otherwise.
+ */
+ iter->leftover = ret;
}
return 0;
mutex_lock(&tracing_cpumask_update_lock);
local_irq_disable();
- __raw_spin_lock(&ftrace_max_lock);
+ arch_spin_lock(&ftrace_max_lock);
for_each_tracing_cpu(cpu) {
/*
* Increase/decrease the disabled counter if we are
atomic_dec(&global_trace.data[cpu]->disabled);
}
}
- __raw_spin_unlock(&ftrace_max_lock);
+ arch_spin_unlock(&ftrace_max_lock);
local_irq_enable();
cpumask_copy(tracing_cpumask, tracing_cpumask_new);
.write = tracing_cpumask_write,
};
- static ssize_t
- tracing_trace_options_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+ static int tracing_trace_options_show(struct seq_file *m, void *v)
{
struct tracer_opt *trace_opts;
u32 tracer_flags;
- int len = 0;
- char *buf;
- int r = 0;
int i;
-
- /* calculate max size */
- for (i = 0; trace_options[i]; i++) {
- len += strlen(trace_options[i]);
- len += 3; /* "no" and newline */
- }
-
mutex_lock(&trace_types_lock);
tracer_flags = current_trace->flags->val;
trace_opts = current_trace->flags->opts;
- /*
- * Increase the size with names of options specific
- * of the current tracer.
- */
- for (i = 0; trace_opts[i].name; i++) {
- len += strlen(trace_opts[i].name);
- len += 3; /* "no" and newline */
- }
-
- /* +1 for \0 */
- buf = kmalloc(len + 1, GFP_KERNEL);
- if (!buf) {
- mutex_unlock(&trace_types_lock);
- return -ENOMEM;
- }
-
for (i = 0; trace_options[i]; i++) {
if (trace_flags & (1 << i))
- r += sprintf(buf + r, "%s\n", trace_options[i]);
+ seq_printf(m, "%s\n", trace_options[i]);
else
- r += sprintf(buf + r, "no%s\n", trace_options[i]);
+ seq_printf(m, "no%s\n", trace_options[i]);
}
for (i = 0; trace_opts[i].name; i++) {
if (tracer_flags & trace_opts[i].bit)
- r += sprintf(buf + r, "%s\n",
- trace_opts[i].name);
+ seq_printf(m, "%s\n", trace_opts[i].name);
else
- r += sprintf(buf + r, "no%s\n",
- trace_opts[i].name);
+ seq_printf(m, "no%s\n", trace_opts[i].name);
}
mutex_unlock(&trace_types_lock);
- WARN_ON(r >= len + 1);
+ return 0;
+ }
- r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ static int __set_tracer_option(struct tracer *trace,
+ struct tracer_flags *tracer_flags,
+ struct tracer_opt *opts, int neg)
+ {
+ int ret;
- kfree(buf);
- return r;
+ ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
+ if (ret)
+ return ret;
+
+ if (neg)
+ tracer_flags->val &= ~opts->bit;
+ else
+ tracer_flags->val |= opts->bit;
+ return 0;
}
/* Try to assign a tracer specific option */
{
struct tracer_flags *tracer_flags = trace->flags;
struct tracer_opt *opts = NULL;
- int ret = 0, i = 0;
- int len;
+ int i;
for (i = 0; tracer_flags->opts[i].name; i++) {
opts = &tracer_flags->opts[i];
- len = strlen(opts->name);
- if (strncmp(cmp, opts->name, len) == 0) {
- ret = trace->set_flag(tracer_flags->val,
- opts->bit, !neg);
- break;
- }
+ if (strcmp(cmp, opts->name) == 0)
+ return __set_tracer_option(trace, trace->flags,
+ opts, neg);
}
- /* Not found */
- if (!tracer_flags->opts[i].name)
- return -EINVAL;
-
- /* Refused to handle */
- if (ret)
- return ret;
-
- if (neg)
- tracer_flags->val &= ~opts->bit;
- else
- tracer_flags->val |= opts->bit;
- return 0;
+ return -EINVAL;
}
static void set_tracer_flags(unsigned int mask, int enabled)
size_t cnt, loff_t *ppos)
{
char buf[64];
- char *cmp = buf;
+ char *cmp;
int neg = 0;
int ret;
int i;
return -EFAULT;
buf[cnt] = 0;
+ cmp = strstrip(buf);
- if (strncmp(buf, "no", 2) == 0) {
+ if (strncmp(cmp, "no", 2) == 0) {
neg = 1;
cmp += 2;
}
for (i = 0; trace_options[i]; i++) {
- int len = strlen(trace_options[i]);
-
- if (strncmp(cmp, trace_options[i], len) == 0) {
+ if (strcmp(cmp, trace_options[i]) == 0) {
set_tracer_flags(1 << i, !neg);
break;
}
return ret;
}
- filp->f_pos += cnt;
+ *ppos += cnt;
return cnt;
}
+ static int tracing_trace_options_open(struct inode *inode, struct file *file)
+ {
+ if (tracing_disabled)
+ return -ENODEV;
+ return single_open(file, tracing_trace_options_show, NULL);
+ }
+
static const struct file_operations tracing_iter_fops = {
- .open = tracing_open_generic,
- .read = tracing_trace_options_read,
+ .open = tracing_trace_options_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
.write = tracing_trace_options_write,
};
}
mutex_unlock(&trace_types_lock);
- filp->f_pos += cnt;
+ *ppos += cnt;
return cnt;
}
if (err)
return err;
- filp->f_pos += ret;
+ *ppos += ret;
return ret;
}
else
cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
+
+ if (iter->trace->pipe_close)
+ iter->trace->pipe_close(iter);
+
mutex_unlock(&trace_types_lock);
free_cpumask_var(iter->started);
__free_page(spd->pages[idx]);
}
- static struct pipe_buf_operations tracing_pipe_buf_ops = {
+ static const struct pipe_buf_operations tracing_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
}
}
- filp->f_pos += cnt;
+ *ppos += cnt;
/* If check pages failed, return ENOMEM */
if (tracing_disabled)
size_t cnt, loff_t *fpos)
{
char *buf;
- char *end;
if (tracing_disabled)
return -EINVAL;
if (cnt > TRACE_BUF_SIZE)
cnt = TRACE_BUF_SIZE;
- buf = kmalloc(cnt + 1, GFP_KERNEL);
+ buf = kmalloc(cnt + 2, GFP_KERNEL);
if (buf == NULL)
return -ENOMEM;
kfree(buf);
return -EFAULT;
}
+ if (buf[cnt-1] != '\n') {
+ buf[cnt] = '\n';
+ buf[cnt+1] = '\0';
+ } else
+ buf[cnt] = '\0';
- /* Cut from the first nil or newline. */
- buf[cnt] = '\0';
- end = strchr(buf, '\n');
- if (end)
- *end = '\0';
-
- cnt = mark_printk("%s\n", buf);
+ cnt = mark_printk("%s", buf);
kfree(buf);
*fpos += cnt;
return cnt;
}
- static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+ static int tracing_clock_show(struct seq_file *m, void *v)
{
- char buf[64];
- int bufiter = 0;
int i;
for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
- bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
+ seq_printf(m,
"%s%s%s%s", i ? " " : "",
i == trace_clock_id ? "[" : "", trace_clocks[i].name,
i == trace_clock_id ? "]" : "");
- bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
+ seq_putc(m, '\n');
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
+ return 0;
}
static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
return cnt;
}
+ static int tracing_clock_open(struct inode *inode, struct file *file)
+ {
+ if (tracing_disabled)
+ return -ENODEV;
+ return single_open(file, tracing_clock_show, NULL);
+ }
+
static const struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
};
static const struct file_operations trace_clock_fops = {
- .open = tracing_open_generic,
- .read = tracing_clock_read,
+ .open = tracing_clock_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
.write = tracing_clock_write,
};
}
/* Pipe buffer operations for a buffer. */
- static struct pipe_buf_operations buffer_pipe_buf_ops = {
+ static const struct pipe_buf_operations buffer_pipe_buf_ops = {
.can_merge = 0,
.map = generic_pipe_buf_map,
.unmap = generic_pipe_buf_unmap,
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
- return ENOMEM;
+ return -ENOMEM;
trace_seq_init(s);
if (ret < 0)
return ret;
- ret = 0;
- switch (val) {
- case 0:
- /* do nothing if already cleared */
- if (!(topt->flags->val & topt->opt->bit))
- break;
-
- mutex_lock(&trace_types_lock);
- if (current_trace->set_flag)
- ret = current_trace->set_flag(topt->flags->val,
- topt->opt->bit, 0);
- mutex_unlock(&trace_types_lock);
- if (ret)
- return ret;
- topt->flags->val &= ~topt->opt->bit;
- break;
- case 1:
- /* do nothing if already set */
- if (topt->flags->val & topt->opt->bit)
- break;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ if (!!(topt->flags->val & topt->opt->bit) != val) {
mutex_lock(&trace_types_lock);
- if (current_trace->set_flag)
- ret = current_trace->set_flag(topt->flags->val,
- topt->opt->bit, 1);
+ ret = __set_tracer_option(current_trace, topt->flags,
+ topt->opt, !val);
mutex_unlock(&trace_types_lock);
if (ret)
return ret;
- topt->flags->val |= topt->opt->bit;
- break;
-
- default:
- return -EINVAL;
}
*ppos += cnt;
static void __ftrace_dump(bool disable_tracing)
{
- static raw_spinlock_t ftrace_dump_lock =
- (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t ftrace_dump_lock =
+ (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
unsigned int old_userobj;
/* only one dump */
local_irq_save(flags);
- __raw_spin_lock(&ftrace_dump_lock);
+ arch_spin_lock(&ftrace_dump_lock);
if (dump_ran)
goto out;
}
out:
- __raw_spin_unlock(&ftrace_dump_lock);
+ arch_spin_unlock(&ftrace_dump_lock);
local_irq_restore(flags);
}
#include "trace.h"
#include "trace_output.h"
- struct fgraph_data {
+ struct fgraph_cpu_data {
pid_t last_pid;
int depth;
+ int ignore;
+ };
+
+ struct fgraph_data {
+ struct fgraph_cpu_data *cpu_data;
+
+ /* Place to preserve last processed entry. */
+ struct ftrace_graph_ent_entry ent;
+ struct ftrace_graph_ret_entry ret;
+ int failed;
+ int cpu;
};
#define TRACE_GRAPH_INDENT 2
struct ring_buffer *buffer = tr->buffer;
struct ftrace_graph_ent_entry *entry;
- if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
return 0;
event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
struct ring_buffer *buffer = tr->buffer;
struct ftrace_graph_ret_entry *entry;
- if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
return;
event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
if (!data)
return TRACE_TYPE_HANDLED;
- last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+ last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
if (*last_pid == pid)
return TRACE_TYPE_HANDLED;
get_return_for_leaf(struct trace_iterator *iter,
struct ftrace_graph_ent_entry *curr)
{
- struct ring_buffer_iter *ring_iter;
+ struct fgraph_data *data = iter->private;
+ struct ring_buffer_iter *ring_iter = NULL;
struct ring_buffer_event *event;
struct ftrace_graph_ret_entry *next;
- ring_iter = iter->buffer_iter[iter->cpu];
+ /*
+ * If the previous output failed to write to the seq buffer,
+ * then we just reuse the data from before.
+ */
+ if (data && data->failed) {
+ curr = &data->ent;
+ next = &data->ret;
+ } else {
- /* First peek to compare current entry and the next one */
- if (ring_iter)
- event = ring_buffer_iter_peek(ring_iter, NULL);
- else {
- /* We need to consume the current entry to see the next one */
- ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
- event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
- NULL);
- }
+ ring_iter = iter->buffer_iter[iter->cpu];
+
+ /* First peek to compare current entry and the next one */
+ if (ring_iter)
+ event = ring_buffer_iter_peek(ring_iter, NULL);
+ else {
+ /*
+ * We need to consume the current entry to see
+ * the next one.
+ */
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+ event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+ NULL);
+ }
- if (!event)
- return NULL;
+ if (!event)
+ return NULL;
+
+ next = ring_buffer_event_data(event);
- next = ring_buffer_event_data(event);
+ if (data) {
+ /*
+ * Save current and next entries for later reference
+ * if the output fails.
+ */
+ data->ent = *curr;
+ data->ret = *next;
+ }
+ }
if (next->ent.type != TRACE_GRAPH_RET)
return NULL;
if (data) {
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
/*
* Comments display at + 1 to depth. Since
if (data) {
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
*depth = call->depth;
}
print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
struct trace_iterator *iter)
{
- int cpu = iter->cpu;
+ struct fgraph_data *data = iter->private;
struct ftrace_graph_ent *call = &field->graph_ent;
struct ftrace_graph_ret_entry *leaf_ret;
+ static enum print_line_t ret;
+ int cpu = iter->cpu;
if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
return TRACE_TYPE_PARTIAL_LINE;
leaf_ret = get_return_for_leaf(iter, field);
if (leaf_ret)
- return print_graph_entry_leaf(iter, field, leaf_ret, s);
+ ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
else
- return print_graph_entry_nested(iter, field, s, cpu);
+ ret = print_graph_entry_nested(iter, field, s, cpu);
+ if (data) {
+ /*
+ * If we failed to write our output, then we need to make
+ * note of it. Because we already consumed our entry.
+ */
+ if (s->full) {
+ data->failed = 1;
+ data->cpu = cpu;
+ } else
+ data->failed = 0;
+ }
+
+ return ret;
}
static enum print_line_t
if (data) {
int cpu = iter->cpu;
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
/*
* Comments display at + 1 to depth. This is the
int i;
if (data)
- depth = per_cpu_ptr(data, iter->cpu)->depth;
+ depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
if (print_graph_prologue(iter, s, 0, 0))
return TRACE_TYPE_PARTIAL_LINE;
enum print_line_t
print_graph_function(struct trace_iterator *iter)
{
+ struct ftrace_graph_ent_entry *field;
+ struct fgraph_data *data = iter->private;
struct trace_entry *entry = iter->ent;
struct trace_seq *s = &iter->seq;
+ int cpu = iter->cpu;
+ int ret;
+
+ if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
+ per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
+ return TRACE_TYPE_HANDLED;
+ }
+
+ /*
+ * If the last output failed, there's a possibility we need
+ * to print out the missing entry which would never go out.
+ */
+ if (data && data->failed) {
+ field = &data->ent;
+ iter->cpu = data->cpu;
+ ret = print_graph_entry(field, s, iter);
+ if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
+ per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
+ ret = TRACE_TYPE_NO_CONSUME;
+ }
+ iter->cpu = cpu;
+ return ret;
+ }
switch (entry->type) {
case TRACE_GRAPH_ENT: {
* sizeof(struct ftrace_graph_ent_entry) is very small,
* it can be safely saved at the stack.
*/
- struct ftrace_graph_ent_entry *field, saved;
+ struct ftrace_graph_ent_entry saved;
trace_assign_type(field, entry);
saved = *field;
return print_graph_entry(&saved, s, iter);
static void graph_trace_open(struct trace_iterator *iter)
{
/* pid and depth on the last trace processed */
- struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+ struct fgraph_data *data;
int cpu;
+ iter->private = NULL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
- pr_warning("function graph tracer: not enough memory\n");
- else
- for_each_possible_cpu(cpu) {
- pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
- int *depth = &(per_cpu_ptr(data, cpu)->depth);
- *pid = -1;
- *depth = 0;
- }
+ goto out_err;
+
+ data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+ if (!data->cpu_data)
+ goto out_err_free;
+
+ for_each_possible_cpu(cpu) {
+ pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
+ int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+ int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+ *pid = -1;
+ *depth = 0;
+ *ignore = 0;
+ }
iter->private = data;
+
+ return;
+
+ out_err_free:
+ kfree(data);
+ out_err:
+ pr_warning("function graph tracer: not enough memory\n");
}
static void graph_trace_close(struct trace_iterator *iter)
{
- free_percpu(iter->private);
+ struct fgraph_data *data = iter->private;
+
+ if (data) {
+ free_percpu(data->cpu_data);
+ kfree(data);
+ }
}
static struct tracer graph_trace __read_mostly = {
.name = "function_graph",
.open = graph_trace_open,
+ .pipe_open = graph_trace_open,
.close = graph_trace_close,
+ .pipe_close = graph_trace_close,
.wait_pipe = poll_wait_pipe,
.init = graph_trace_init,
.reset = graph_trace_reset,
#include <asm/cacheflush.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
+ #include <asm/io.h>
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
*
* During allocation, pcpu_alloc_mutex is kept locked all the time and
* pcpu_lock is grabbed and released as necessary. All actual memory
- * allocations are done using GFP_KERNEL with pcpu_lock released.
+ * allocations are done using GFP_KERNEL with pcpu_lock released. In
+ * general, percpu memory can't be allocated with irq off but
+ * irqsave/restore are still used in alloc path so that it can be used
+ * from early init path - sched_init() specifically.
*
* Free path accesses and alters only the index data structures, so it
* can be safely called from atomic context. When memory needs to be
}
/**
- * pcpu_extend_area_map - extend area map for allocation
- * @chunk: target chunk
+ * pcpu_need_to_extend - determine whether chunk area map needs to be extended
+ * @chunk: chunk of interest
*
- * Extend area map of @chunk so that it can accomodate an allocation.
- * A single allocation can split an area into three areas, so this
- * function makes sure that @chunk->map has at least two extra slots.
+ * Determine whether area map of @chunk needs to be extended to
+ * accomodate a new allocation.
*
* CONTEXT:
- * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired
- * if area map is extended.
+ * pcpu_lock.
*
* RETURNS:
- * 0 if noop, 1 if successfully extended, -errno on failure.
+ * New target map allocation length if extension is necessary, 0
+ * otherwise.
*/
- static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
- __releases(lock) __acquires(lock)
+ static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
{
int new_alloc;
- int *new;
- size_t size;
- /* has enough? */
if (chunk->map_alloc >= chunk->map_used + 2)
return 0;
- spin_unlock_irq(&pcpu_lock);
-
new_alloc = PCPU_DFL_MAP_ALLOC;
while (new_alloc < chunk->map_used + 2)
new_alloc *= 2;
- new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
- if (!new) {
- spin_lock_irq(&pcpu_lock);
+ return new_alloc;
+ }
+
+ /**
+ * pcpu_extend_area_map - extend area map of a chunk
+ * @chunk: chunk of interest
+ * @new_alloc: new target allocation length of the area map
+ *
+ * Extend area map of @chunk to have @new_alloc entries.
+ *
+ * CONTEXT:
+ * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
+ {
+ int *old = NULL, *new = NULL;
+ size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
+ unsigned long flags;
+
+ new = pcpu_mem_alloc(new_size);
+ if (!new)
return -ENOMEM;
- }
- /*
- * Acquire pcpu_lock and switch to new area map. Only free
- * could have happened inbetween, so map_used couldn't have
- * grown.
- */
- spin_lock_irq(&pcpu_lock);
- BUG_ON(new_alloc < chunk->map_used + 2);
+ /* acquire pcpu_lock and switch to new area map */
+ spin_lock_irqsave(&pcpu_lock, flags);
+
+ if (new_alloc <= chunk->map_alloc)
+ goto out_unlock;
- size = chunk->map_alloc * sizeof(chunk->map[0]);
- memcpy(new, chunk->map, size);
+ old_size = chunk->map_alloc * sizeof(chunk->map[0]);
+ memcpy(new, chunk->map, old_size);
/*
* map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
* one of the first chunks and still using static map.
*/
if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
- pcpu_mem_free(chunk->map, size);
+ old = chunk->map;
chunk->map_alloc = new_alloc;
chunk->map = new;
+ new = NULL;
+
+ out_unlock:
+ spin_unlock_irqrestore(&pcpu_lock, flags);
+
+ /*
+ * pcpu_mem_free() might end up calling vfree() which uses
+ * IRQ-unsafe lock and thus can't be called under pcpu_lock.
+ */
+ pcpu_mem_free(old, old_size);
+ pcpu_mem_free(new, new_size);
+
return 0;
}
int rs, re;
/* quick path, check whether it's empty already */
- pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
- if (rs == page_start && re == page_end)
- return;
- break;
- }
+ rs = page_start;
+ pcpu_next_unpop(chunk, &rs, &re, page_end);
+ if (rs == page_start && re == page_end)
+ return;
/* immutable chunks can't be depopulated */
WARN_ON(chunk->immutable);
int rs, re, rc;
/* quick path, check whether all pages are already there */
- pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) {
- if (rs == page_start && re == page_end)
- goto clear;
- break;
- }
+ rs = page_start;
+ pcpu_next_pop(chunk, &rs, &re, page_end);
+ if (rs == page_start && re == page_end)
+ goto clear;
/* need to allocate and map pages, this chunk can't be immutable */
WARN_ON(chunk->immutable);
static int warn_limit = 10;
struct pcpu_chunk *chunk;
const char *err;
- int slot, off;
+ int slot, off, new_alloc;
+ unsigned long flags;
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
WARN(true, "illegal size (%zu) or align (%zu) for "
}
mutex_lock(&pcpu_alloc_mutex);
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, flags);
/* serve reserved allocations from the reserved chunk if available */
if (reserved && pcpu_reserved_chunk) {
chunk = pcpu_reserved_chunk;
- if (size > chunk->contig_hint ||
- pcpu_extend_area_map(chunk) < 0) {
- err = "failed to extend area map of reserved chunk";
+
+ if (size > chunk->contig_hint) {
+ err = "alloc from reserved chunk failed";
goto fail_unlock;
}
+
+ while ((new_alloc = pcpu_need_to_extend(chunk))) {
+ spin_unlock_irqrestore(&pcpu_lock, flags);
+ if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
+ err = "failed to extend area map of reserved chunk";
+ goto fail_unlock_mutex;
+ }
+ spin_lock_irqsave(&pcpu_lock, flags);
+ }
+
off = pcpu_alloc_area(chunk, size, align);
if (off >= 0)
goto area_found;
+
err = "alloc from reserved chunk failed";
goto fail_unlock;
}
if (size > chunk->contig_hint)
continue;
- switch (pcpu_extend_area_map(chunk)) {
- case 0:
- break;
- case 1:
- goto restart; /* pcpu_lock dropped, restart */
- default:
- err = "failed to extend area map";
- goto fail_unlock;
+ new_alloc = pcpu_need_to_extend(chunk);
+ if (new_alloc) {
+ spin_unlock_irqrestore(&pcpu_lock, flags);
+ if (pcpu_extend_area_map(chunk,
+ new_alloc) < 0) {
+ err = "failed to extend area map";
+ goto fail_unlock_mutex;
+ }
+ spin_lock_irqsave(&pcpu_lock, flags);
+ /*
+ * pcpu_lock has been dropped, need to
+ * restart cpu_slot list walking.
+ */
+ goto restart;
}
off = pcpu_alloc_area(chunk, size, align);
}
/* hmmm... no space left, create a new chunk */
- spin_unlock_irq(&pcpu_lock);
+ spin_unlock_irqrestore(&pcpu_lock, flags);
chunk = alloc_pcpu_chunk();
if (!chunk) {
goto fail_unlock_mutex;
}
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, flags);
pcpu_chunk_relocate(chunk, -1);
goto restart;
area_found:
- spin_unlock_irq(&pcpu_lock);
+ spin_unlock_irqrestore(&pcpu_lock, flags);
/* populate, map and clear the area */
if (pcpu_populate_chunk(chunk, off, size)) {
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, flags);
pcpu_free_area(chunk, off);
err = "failed to populate";
goto fail_unlock;
return __addr_to_pcpu_ptr(chunk->base_addr + off);
fail_unlock:
- spin_unlock_irq(&pcpu_lock);
+ spin_unlock_irqrestore(&pcpu_lock, flags);
fail_unlock_mutex:
mutex_unlock(&pcpu_alloc_mutex);
if (warn_limit) {
}
EXPORT_SYMBOL_GPL(free_percpu);
+ /**
+ * per_cpu_ptr_to_phys - convert translated percpu address to physical address
+ * @addr: the address to be converted to physical address
+ *
+ * Given @addr which is dereferenceable address obtained via one of
+ * percpu access macros, this function translates it into its physical
+ * address. The caller is responsible for ensuring @addr stays valid
+ * until this function finishes.
+ *
+ * RETURNS:
+ * The physical address for @addr.
+ */
+ phys_addr_t per_cpu_ptr_to_phys(void *addr)
+ {
+ if ((unsigned long)addr < VMALLOC_START ||
+ (unsigned long)addr >= VMALLOC_END)
+ return __pa(addr);
+ else
+ return page_to_phys(vmalloc_to_page(addr));
+ }
+
static inline size_t pcpu_calc_fc_sizes(size_t static_size,
size_t reserved_size,
ssize_t *dyn_sizep)