Merge branch 'master' into percpu

author Tejun Heo <tj@kernel.org>

Tue, 5 Jan 2010 00:17:33 +0000 (09:17 +0900)

committer Tejun Heo <tj@kernel.org>

Tue, 5 Jan 2010 00:17:33 +0000 (09:17 +0900)
author Tejun Heo <tj@kernel.org>
Tue, 5 Jan 2010 00:17:33 +0000 (09:17 +0900)
committer Tejun Heo <tj@kernel.org>
Tue, 5 Jan 2010 00:17:33 +0000 (09:17 +0900)
diff --combined arch/blackfin/mach-common/entry.S

index a3ea7e9fe43b1644fbe043c5722a1bbf823ffd58,b0ed0b487ff24dbd94b66565f8843afda8feea71..01b2f58dfb95f9e83d8f5cbf8067fb68358e6c9f
--- 1/arch/blackfin/mach-common/entry.S
--- 2/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@@ -1,32 -1,11 +1,11 @@@
   /*
-  * File:         arch/blackfin/mach-common/entry.S
-  * Based on:
-  * Author:       Linus Torvalds
+  * Contains the system-call and fault low-level handling routines.
+  * This also contains the timer-interrupt handler, as well as all
+  * interrupts and faults that can result in a task-switch.
    *
-  * Created:      ?
-  * Description:  contains the system-call and fault low-level handling routines.
-  *               This also contains the timer-interrupt handler, as well as all
-  *               interrupts and faults that can result in a task-switch.
+  * Copyright 2005-2009 Analog Devices Inc.
    *
-  * Modified:
-  *               Copyright 2004-2006 Analog Devices Inc.
-  *
-  * Bugs:         Enter bugs at http://blackfin.uclinux.org/
-  *
-  * This program is free software; you can redistribute it and/or modify
-  * it under the terms of the GNU General Public License as published by
-  * the Free Software Foundation; either version 2 of the License, or
-  * (at your option) any later version.
-  *
-  * This program is distributed in the hope that it will be useful,
-  * but WITHOUT ANY WARRANTY; without even the implied warranty of
-  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  * GNU General Public License for more details.
-  *
-  * You should have received a copy of the GNU General Public License
-  * along with this program; if not, see the file COPYING, or write
-  * to the Free Software Foundation, Inc.,
-  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+  * Licensed under the GPL-2 or later.
    */
   
   /* NOTE: This code handles signal-recognition, which happens every time
@@@ -734,6 -713,8 +713,8 @@@ ENTRY(_system_call
         cc = BITTST(r7, TIF_RESTORE_SIGMASK);
         if cc jump .Lsyscall_do_signals;
         cc = BITTST(r7, TIF_SIGPENDING);
+       if cc jump .Lsyscall_do_signals;
+       cc = BITTST(r7, TIF_NOTIFY_RESUME);
         if !cc jump .Lsyscall_really_exit;
   .Lsyscall_do_signals:
         /* Reenable interrupts.  */
@@@ -742,7 -723,7 +723,7 @@@
   
         r0 = sp;
         SP += -12;
-       call _do_signal;
+       call _do_notify_resume;
         SP += 12;
   
   .Lsyscall_really_exit:
@@@ -835,8 -816,8 +816,8 @@@ ENDPROC(_resume
   
   ENTRY(_ret_from_exception)
   #ifdef CONFIG_IPIPE
- -      p2.l = _per_cpu__ipipe_percpu_domain;
- -      p2.h = _per_cpu__ipipe_percpu_domain;
+ +      p2.l = _ipipe_percpu_domain;
+ +      p2.h = _ipipe_percpu_domain;
         r0.l = _ipipe_root;
         r0.h = _ipipe_root;
         r2 = [p2];
@@@ -1443,7 -1424,7 +1424,7 @@@ ENTRY(_sys_call_table
         .long _sys_ni_syscall   /* streams2 */
         .long _sys_vfork                /* 190 */
         .long _sys_getrlimit
-       .long _sys_mmap2
+       .long _sys_mmap_pgoff
         .long _sys_truncate64
         .long _sys_ftruncate64
         .long _sys_stat64       /* 195 */
@@@ -1621,6 -1602,7 +1602,7 @@@
         .long _sys_pwritev
         .long _sys_rt_tgsigqueueinfo
         .long _sys_perf_event_open
+       .long _sys_recvmmsg             /* 370 */
   
         .rept NR_syscalls-(.-_sys_call_table)/4
         .long _sys_ni_syscall
diff --combined arch/x86/include/asm/system.h

index de10c19d95586b6b59251ad959d6e8990dd65d9c,ecb544e65382893970f2090dd3bb341d03583f4a..e529f26c3292762193e6281f832fc90e01daa1b2
--- 1/arch/x86/include/asm/system.h
--- 2/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@@ -23,6 -23,7 +23,7 @@@ struct task_struct *__switch_to(struct 
   struct tss_struct;
   void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                       struct tss_struct *tss);
+ extern void show_regs_common(void);
   
   #ifdef CONFIG_X86_32
   
@@@ -31,7 -32,7 +32,7 @@@
         "movl %P[task_canary](%[next]), %%ebx\n\t"                      \
         "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
   #define __switch_canary_oparam                                                \
- -      , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
+ +      , [stack_canary] "=m" (stack_canary.canary)
   #define __switch_canary_iparam                                                \
         , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
   #else /* CC_STACKPROTECTOR */
@@@ -113,7 -114,7 +114,7 @@@ do {                                                                       
         "movq %P[task_canary](%%rsi),%%r8\n\t"                            \
         "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
   #define __switch_canary_oparam                                                  \
- -      , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+ +      , [gs_canary] "=m" (irq_stack_union.stack_canary)
   #define __switch_canary_iparam                                                  \
         , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
   #else /* CC_STACKPROTECTOR */
@@@ -128,13 -129,11 +129,11 @@@
              "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */       \
              "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */    \
              "call __switch_to\n\t"                                       \
-            ".globl thread_return\n"                                     \
-            "thread_return:\n\t"                                         \
              "movq "__percpu_arg([current_task])",%%rsi\n\t"              \
              __switch_canary                                              \
              "movq %P[thread_info](%%rsi),%%r8\n\t"                       \
              "movq %%rax,%%rdi\n\t"                                       \
- -           "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"         \
+ +           "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"                 \
              "jnz   ret_from_fork\n\t"                                    \
              RESTORE_CONTEXT                                              \
              : "=a" (last)                                                \
@@@ -144,7 -143,7 +143,7 @@@
                [ti_flags] "i" (offsetof(struct thread_info, flags)),      \
                [_tif_fork] "i" (_TIF_FORK),                               \
                [thread_info] "i" (offsetof(struct task_struct, stack)),   \
- -             [current_task] "m" (per_cpu_var(current_task))             \
+ +             [current_task] "m" (current_task)                          \
                __switch_canary_iparam                                     \
              : "memory", "cc" __EXTRA_CLOBBER)
   #endif
@@@ -157,19 -156,22 +156,22 @@@ extern void native_load_gs_index(unsign
    * Load a segment. Fall back on loading the zero
    * segment if something goes wrong..
    */
- #define loadsegment(seg, value)                       \
-       asm volatile("\n"                       \
-                    "1:\t"                     \
-                    "movl %k0,%%" #seg "\n"    \
-                    "2:\n"                     \
-                    ".section .fixup,\"ax\"\n" \
-                    "3:\t"                     \
-                    "movl %k1, %%" #seg "\n\t" \
-                    "jmp 2b\n"                 \
-                    ".previous\n"              \
-                    _ASM_EXTABLE(1b,3b)        \
-                    : :"r" (value), "r" (0) : "memory")
- 
+ #define loadsegment(seg, value)                                               \
+ do {                                                                  \
+       unsigned short __val = (value);                                 \
+                                                                       \
+       asm volatile("                                          \n"     \
+                    "1:        movl %k0,%%" #seg "             \n"     \
+                                                                       \
+                    ".section .fixup,\"ax\"                    \n"     \
+                    "2:        xorl %k0,%k0                    \n"     \
+                    "          jmp 1b                          \n"     \
+                    ".previous                                 \n"     \
+                                                                       \
+                    _ASM_EXTABLE(1b, 2b)                               \
+                                                                       \
+                    : "+r" (__val) : : "memory");                      \
+ } while (0)
   
   /*
    * Save a segment register away
diff --combined arch/x86/kernel/apic/nmi.c

index 45404379d173637133731004540b8511495bee17,0159a69396cba449a424190459a02d83a3f417d8..4ada42c3dabb97aec6dd0dd27b5405258d31397a
--- 1/arch/x86/kernel/apic/nmi.c
--- 2/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@@ -39,7 -39,8 +39,8 @@@
   int unknown_nmi_panic;
   int nmi_watchdog_enabled;
   
- static cpumask_t backtrace_mask __read_mostly;
+ /* For reliability, we're prepared to waste bits here. */
+ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
   
   /* nmi_active:
    * >0: the lapic NMI watchdog is active, but can be disabled
@@@ -414,7 -415,7 +415,7 @@@ nmi_watchdog_tick(struct pt_regs *regs
         }
   
         /* We can be called before check_nmi_watchdog, hence NULL check. */
-       if (cpumask_test_cpu(cpu, &backtrace_mask)) {
+       if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
                 static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
   
                 spin_lock(&lock);
@@@ -422,7 -423,7 +423,7 @@@
                 show_regs(regs);
                 dump_stack();
                 spin_unlock(&lock);
-               cpumask_clear_cpu(cpu, &backtrace_mask);
+               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
   
                 rc = 1;
         }
@@@ -437,8 -438,8 +438,8 @@@
                  * Ayiee, looks like this CPU is stuck ...
                  * wait a few IRQs (5 seconds) before doing the oops ...
                  */
- -              __this_cpu_inc(per_cpu_var(alert_counter));
- -              if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz)
+ +              __this_cpu_inc(alert_counter);
+ +              if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
                         /*
                          * die_nmi will return ONLY if NOTIFY_STOP happens..
                          */
@@@ -446,7 -447,7 +447,7 @@@
                                 regs, panic_on_timeout);
         } else {
                 __get_cpu_var(last_irq_sum) = sum;
- -              __this_cpu_write(per_cpu_var(alert_counter), 0);
+ +              __this_cpu_write(alert_counter, 0);
         }
   
         /* see if the nmi watchdog went off */
@@@ -558,14 -559,14 +559,14 @@@ void arch_trigger_all_cpu_backtrace(voi
   {
         int i;
   
-       cpumask_copy(&backtrace_mask, cpu_online_mask);
+       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
   
         printk(KERN_INFO "sending NMI to all CPUs:\n");
         apic->send_IPI_all(NMI_VECTOR);
   
         /* Wait for up to 10 seconds for all CPUs to do the backtrace */
         for (i = 0; i < 10 * 1000; i++) {
-               if (cpumask_empty(&backtrace_mask))
+               if (cpumask_empty(to_cpumask(backtrace_mask)))
                         break;
                 mdelay(1);
         }
diff --combined arch/x86/kernel/head_32.S

index fd39eaf83b8487606166fb7425378c5afdc06714,7fd318bac59ce54294ca3e33cef0347c39240f6c..37c3d4b17d859d6ee38029a83f2abcaee6d4dc05
--- 1/arch/x86/kernel/head_32.S
--- 2/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@@ -18,6 -18,8 +18,8 @@@
   #include <asm/asm-offsets.h>
   #include <asm/setup.h>
   #include <asm/processor-flags.h>
+ #include <asm/msr-index.h>
+ #include <asm/cpufeature.h>
   #include <asm/percpu.h>
   
   /* Physical address */
@@@ -297,25 -299,27 +299,27 @@@ ENTRY(startup_32_smp
         orl %edx,%eax
         movl %eax,%cr4
   
-       btl $5, %eax            # check if PAE is enabled
-       jnc 6f
+       testb $X86_CR4_PAE, %al         # check if PAE is enabled
+       jz 6f
   
         /* Check if extended functions are implemented */
         movl $0x80000000, %eax
         cpuid
-       cmpl $0x80000000, %eax
-       jbe 6f
+       /* Value must be in the range 0x80000001 to 0x8000ffff */
+       subl $0x80000001, %eax
+       cmpl $(0x8000ffff-0x80000001), %eax
+       ja 6f
         mov $0x80000001, %eax
         cpuid
         /* Execute Disable bit supported? */
-       btl $20, %edx
+       btl $(X86_FEATURE_NX & 31), %edx
         jnc 6f
   
         /* Setup EFER (Extended Feature Enable Register) */
-       movl $0xc0000080, %ecx
+       movl $MSR_EFER, %ecx
         rdmsr
   
-       btsl $11, %eax
+       btsl $_EFER_NX, %eax
         /* Make changes effective */
         wrmsr
   
@@@ -438,8 -442,8 +442,8 @@@ is386:     movl $2,%ecx            # set M
          */
         cmpb $0,ready
         jne 1f
- -      movl $per_cpu__gdt_page,%eax
- -      movl $per_cpu__stack_canary,%ecx
+ +      movl $gdt_page,%eax
+ +      movl $stack_canary,%ecx
         movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
         shrl $16, %ecx
         movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@@ -702,7 -706,7 +706,7 @@@ idt_descr
         .word 0                         # 32 bit align gdt_desc.address
   ENTRY(early_gdt_descr)
         .word GDT_ENTRIES*8-1
- -      .long per_cpu__gdt_page         /* Overwritten for secondary CPUs */
+ +      .long gdt_page                  /* Overwritten for secondary CPUs */
   
   /*
    * The boot_gdt must mirror the equivalent in setup.S and is
diff --combined arch/x86/kernel/vmlinux.lds.S

index ecb92717c41264aab23e184463d63fb75db539ae,f92a0da608cb3ade16374118320e4d73220e4b95..44879df55696407556d711b69b0f83fdc311f7f0
--- 1/arch/x86/kernel/vmlinux.lds.S
--- 2/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@@ -41,6 -41,32 +41,32 @@@ ENTRY(phys_startup_64
   jiffies_64 = jiffies;
   #endif
   
+ #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+ /*
+  * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
+  * we retain large page mappings for boundaries spanning kernel text, rodata
+  * and data sections.
+  *
+  * However, kernel identity mappings will have different RWX permissions
+  * to the pages mapping to text and to the pages padding (which are freed) the
+  * text section. Hence kernel identity mappings will be broken to smaller
+  * pages. For 64-bit, kernel text and kernel identity mappings are different,
+  * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
+  * as well as retain 2MB large page mappings for kernel text.
+  */
+ #define X64_ALIGN_DEBUG_RODATA_BEGIN  . = ALIGN(HPAGE_SIZE);
+ 
+ #define X64_ALIGN_DEBUG_RODATA_END                            \
+               . = ALIGN(HPAGE_SIZE);                          \
+               __end_rodata_hpage_align = .;
+ 
+ #else
+ 
+ #define X64_ALIGN_DEBUG_RODATA_BEGIN
+ #define X64_ALIGN_DEBUG_RODATA_END
+ 
+ #endif
+ 
   PHDRS {
         text PT_LOAD FLAGS(5);          /* R_E */
         data PT_LOAD FLAGS(7);          /* RWE */
@@@ -90,7 -116,9 +116,9 @@@ SECTION
   
         EXCEPTION_TABLE(16) :text = 0x9090
   
+       X64_ALIGN_DEBUG_RODATA_BEGIN
         RO_DATA(PAGE_SIZE)
+       X64_ALIGN_DEBUG_RODATA_END
   
         /* Data */
         .data : AT(ADDR(.data) - LOAD_OFFSET) {
@@@ -107,13 -135,13 +135,13 @@@
   
                 PAGE_ALIGNED_DATA(PAGE_SIZE)
   
-               CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
+               CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
   
                 DATA_DATA
                 CONSTRUCTORS
   
                 /* rarely changed data like cpu maps */
-               READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES)
+               READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
   
                 /* End of data section */
                 _edata = .;
@@@ -137,12 -165,12 +165,12 @@@
                 *(.vsyscall_0)
         } :user
   
-       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+       . = ALIGN(L1_CACHE_BYTES);
         .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
                 *(.vsyscall_fn)
         }
   
-       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+       . = ALIGN(L1_CACHE_BYTES);
         .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
                 *(.vsyscall_gtod_data)
         }
@@@ -166,7 -194,7 +194,7 @@@
         }
         vgetcpu_mode = VVIRT(.vgetcpu_mode);
   
-       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+       . = ALIGN(L1_CACHE_BYTES);
         .jiffies : AT(VLOAD(.jiffies)) {
                 *(.jiffies)
         }
@@@ -291,9 -319,7 +319,7 @@@
                 __brk_limit = .;
         }
   
-       .end : AT(ADDR(.end) - LOAD_OFFSET) {
-               _end = .;
-       }
+       _end = .;
   
           STABS_DEBUG
           DWARF_DEBUG
@@@ -305,6 -331,9 +331,9 @@@
   
   
   #ifdef CONFIG_X86_32
+ /*
+  * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
+  */
   . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
            "kernel image bigger than KERNEL_IMAGE_SIZE");
   #else
@@@ -312,7 -341,7 +341,7 @@@
    * Per-cpu symbols which need to be offset from __per_cpu_load
    * for the boot processor.
    */
- -#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+ +#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
   INIT_PER_CPU(gdt_page);
   INIT_PER_CPU(irq_stack_union);
   
@@@ -323,7 -352,7 +352,7 @@@
            "kernel image bigger than KERNEL_IMAGE_SIZE");
   
   #ifdef CONFIG_SMP
- -. = ASSERT((per_cpu__irq_stack_union == 0),
+ +. = ASSERT((irq_stack_union == 0),
              "irq_stack_union is not at start of per-cpu area");
   #endif
   
diff --combined include/linux/compiler.h

index abba8045c6ef19b69e0dea7f625b52d2a816d94e,5be3dab4a69547bf6bb378a45d73e553166c5b12..a5a472b10746c662450059d8af969d58c7dcac6c
--- 1/include/linux/compiler.h
--- 2/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@@ -5,7 -5,7 +5,7 @@@
   
   #ifdef __CHECKER__
   # define __user               __attribute__((noderef, address_space(1)))
- -# define __kernel     /* default address space */
+ +# define __kernel     __attribute__((address_space(0)))
   # define __safe               __attribute__((safe))
   # define __force      __attribute__((force))
   # define __nocast     __attribute__((nocast))
@@@ -15,7 -15,6 +15,7 @@@
   # define __acquire(x) __context__(x,1)
   # define __release(x) __context__(x,-1)
   # define __cond_lock(x,c)     ((c) ? ({ __acquire(x); 1; }) : 0)
+ +# define __percpu     __attribute__((noderef, address_space(3)))
   extern void __chk_user_ptr(const volatile void __user *);
   extern void __chk_io_ptr(const volatile void __iomem *);
   #else
@@@ -33,7 -32,6 +33,7 @@@
   # define __acquire(x) (void)0
   # define __release(x) (void)0
   # define __cond_lock(x,c) (c)
+ +# define __percpu
   #endif
   
   #ifdef __KERNEL__
@@@ -146,6 -144,11 +146,11 @@@ void ftrace_likely_update(struct ftrace
   # define barrier() __memory_barrier()
   #endif
   
+ /* Unreachable code */
+ #ifndef unreachable
+ # define unreachable() do { } while (1)
+ #endif
+ 
   #ifndef RELOC_HIDE
   # define RELOC_HIDE(ptr, off)                                 \
     ({ unsigned long __ptr;                                     \
@@@ -215,6 -218,10 +220,10 @@@
   # define __maybe_unused               /* unimplemented */
   #endif
   
+ #ifndef __always_unused
+ # define __always_unused      /* unimplemented */
+ #endif
+ 
   #ifndef noinline
   #define noinline
   #endif
@@@ -268,6 -275,17 +277,17 @@@
   # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
   #endif
   
+ /* Compile time object size, -1 for unknown */
+ #ifndef __compiletime_object_size
+ # define __compiletime_object_size(obj) -1
+ #endif
+ #ifndef __compiletime_warning
+ # define __compiletime_warning(message)
+ #endif
+ #ifndef __compiletime_error
+ # define __compiletime_error(message)
+ #endif
+ 
   /*
    * Prevent the compiler from merging or refetching accesses.  The compiler
    * is also forbidden from reordering successive instances of ACCESS_ONCE(),
diff --combined include/linux/percpu.h

index 42878f0cd0e2212ee79f853832431b027d7c3062,cf5efbcf716c8cecf74d4d315e2619f6fdcfa1f4..a93e5bfdccb8e8b825006776f4afb77ebc975e90
--- 1/include/linux/percpu.h
--- 2/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@@ -27,17 -27,10 +27,17 @@@
    * we force a syntax error here if it isn't.
    */
   #define get_cpu_var(var) (*({                         \
- -      extern int simple_identifier_##var(void);       \
         preempt_disable();                              \
         &__get_cpu_var(var); }))
- -#define put_cpu_var(var) preempt_enable()
+ +
+ +/*
+ + * The weird & is necessary because sparse considers (void)(var) to be
+ + * a direct dereference of percpu variable (var).
+ + */
+ +#define put_cpu_var(var) do {                         \
+ +      (void)&(var);                                   \
+ +      preempt_enable();                               \
+ +} while (0)
   
   #ifdef CONFIG_SMP
   
@@@ -134,9 -127,10 +134,10 @@@ extern int __init pcpu_page_first_chunk
    */
   #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
   
- -extern void *__alloc_reserved_percpu(size_t size, size_t align);
- -extern void *__alloc_percpu(size_t size, size_t align);
- -extern void free_percpu(void *__pdata);
+ +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+ +extern void __percpu *__alloc_percpu(size_t size, size_t align);
+ +extern void free_percpu(void __percpu *__pdata);
+ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
   
   #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
   extern void __init setup_per_cpu_areas(void);
@@@ -146,7 -140,7 +147,7 @@@
   
   #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
   
- -static inline void *__alloc_percpu(size_t size, size_t align)
+ +static inline void __percpu *__alloc_percpu(size_t size, size_t align)
   {
         /*
          * Can't easily make larger alignment work with kmalloc.  WARN
@@@ -157,11 -151,16 +158,16 @@@
         return kzalloc(size, GFP_KERNEL);
   }
   
- -static inline void free_percpu(void *p)
+ +static inline void free_percpu(void __percpu *p)
   {
         kfree(p);
   }
   
+ static inline phys_addr_t per_cpu_ptr_to_phys(void *addr)
+ {
+       return __pa(addr);
+ }
+ 
   static inline void __init setup_per_cpu_areas(void) { }
   
   static inline void *pcpu_lpage_remapped(void *kaddr)
@@@ -172,7 -171,7 +178,7 @@@
   #endif /* CONFIG_SMP */
   
   #define alloc_percpu(type)    \
- -      (typeof(type) *)__alloc_percpu(sizeof(type), __alignof__(type))
+ +      (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
   
   /*
    * Optional methods for optimized non-lvalue per-cpu variable access.
@@@ -189,19 -188,17 +195,19 @@@
   #ifndef percpu_read
   # define percpu_read(var)                                             \
     ({                                                                  \
- -      typeof(per_cpu_var(var)) __tmp_var__;                           \
- -      __tmp_var__ = get_cpu_var(var);                                 \
- -      put_cpu_var(var);                                               \
- -      __tmp_var__;                                                    \
+ +      typeof(var) *pr_ptr__ = &(var);                                 \
+ +      typeof(var) pr_ret__;                                           \
+ +      pr_ret__ = get_cpu_var(*pr_ptr__);                              \
+ +      put_cpu_var(*pr_ptr__);                                         \
+ +      pr_ret__;                                                       \
     })
   #endif
   
   #define __percpu_generic_to_op(var, val, op)                          \
   do {                                                                  \
- -      get_cpu_var(var) op val;                                        \
- -      put_cpu_var(var);                                               \
+ +      typeof(var) *pgto_ptr__ = &(var);                               \
+ +      get_cpu_var(*pgto_ptr__) op val;                                \
+ +      put_cpu_var(*pgto_ptr__);                                       \
   } while (0)
   
   #ifndef percpu_write
@@@ -237,7 -234,6 +243,7 @@@ extern void __bad_size_call_parameter(v
   
   #define __pcpu_size_call_return(stem, variable)                               \
   ({    typeof(variable) pscr_ret__;                                    \
+ +      __verify_pcpu_ptr(&(variable));                                 \
         switch(sizeof(variable)) {                                      \
         case 1: pscr_ret__ = stem##1(variable);break;                   \
         case 2: pscr_ret__ = stem##2(variable);break;                   \
@@@ -251,7 -247,6 +257,7 @@@
   
   #define __pcpu_size_call(stem, variable, ...)                         \
   do {                                                                  \
+ +      __verify_pcpu_ptr(&(variable));                                 \
         switch(sizeof(variable)) {                                      \
                 case 1: stem##1(variable, __VA_ARGS__);break;           \
                 case 2: stem##2(variable, __VA_ARGS__);break;           \
@@@ -264,7 -259,8 +270,7 @@@
   
   /*
    * Optimized manipulation for memory allocated through the per cpu
- - * allocator or for addresses of per cpu variables (can be determined
- - * using per_cpu_var(xx).
+ + * allocator or for addresses of per cpu variables.
    *
    * These operation guarantee exclusivity of access for other operations
    * on the *same* processor. The assumption is that per cpu data is only
@@@ -315,7 -311,7 +321,7 @@@
   #define _this_cpu_generic_to_op(pcp, val, op)                         \
   do {                                                                  \
         preempt_disable();                                              \
- -      *__this_cpu_ptr(&pcp) op val;                                   \
+ +      *__this_cpu_ptr(&(pcp)) op val;                                 \
         preempt_enable();                                               \
   } while (0)
   
diff --combined include/linux/vmstat.h

index 3e489fda11a1d4a5686efdd50a0f74018602a027,ee03bba9c5df8e9d0b0586fcfff5ef39e254c717..117f0dd8ad03fa3780b86b8feedbdbb1603c1576
--- 1/include/linux/vmstat.h
--- 2/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@@ -40,6 -40,8 +40,8 @@@ enum vm_event_item { PGPGIN, PGPGOUT, P
                 PGSCAN_ZONE_RECLAIM_FAILED,
   #endif
                 PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+               KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
+               KSWAPD_SKIP_CONGESTION_WAIT,
                 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
   #ifdef CONFIG_HUGETLB_PAGE
                 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
@@@ -76,22 -78,22 +78,22 @@@ DECLARE_PER_CPU(struct vm_event_state, 
   
   static inline void __count_vm_event(enum vm_event_item item)
   {
- -      __this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+ +      __this_cpu_inc(vm_event_states.event[item]);
   }
   
   static inline void count_vm_event(enum vm_event_item item)
   {
- -      this_cpu_inc(per_cpu_var(vm_event_states).event[item]);
+ +      this_cpu_inc(vm_event_states.event[item]);
   }
   
   static inline void __count_vm_events(enum vm_event_item item, long delta)
   {
- -      __this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+ +      __this_cpu_add(vm_event_states.event[item], delta);
   }
   
   static inline void count_vm_events(enum vm_event_item item, long delta)
   {
- -      this_cpu_add(per_cpu_var(vm_event_states).event[item], delta);
+ +      this_cpu_add(vm_event_states.event[item], delta);
   }
   
   extern void all_vm_events(unsigned long *);
diff --combined kernel/rcutorture.c

index e339ab34912144c36ba8a0efd4e48b45bebd91d4,9bb52177af02a3e20aa347e3b65c0a236caa1922..0b5217535f71b57e905fab1caaf26923a241593d
--- 1/kernel/rcutorture.c
--- 2/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@@ -327,6 -327,11 +327,11 @@@ rcu_torture_cb(struct rcu_head *p
                 cur_ops->deferred_free(rp);
   }
   
+ static int rcu_no_completed(void)
+ {
+       return 0;
+ }
+ 
   static void rcu_torture_deferred_free(struct rcu_torture *p)
   {
         call_rcu(&p->rtort_rcu, rcu_torture_cb);
@@@ -388,6 -393,21 +393,21 @@@ static struct rcu_torture_ops rcu_sync_
         .name           = "rcu_sync"
   };
   
+ static struct rcu_torture_ops rcu_expedited_ops = {
+       .init           = rcu_sync_torture_init,
+       .cleanup        = NULL,
+       .readlock       = rcu_torture_read_lock,
+       .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+       .readunlock     = rcu_torture_read_unlock,
+       .completed      = rcu_no_completed,
+       .deferred_free  = rcu_sync_torture_deferred_free,
+       .sync           = synchronize_rcu_expedited,
+       .cb_barrier     = NULL,
+       .stats          = NULL,
+       .irq_capable    = 1,
+       .name           = "rcu_expedited"
+ };
+ 
   /*
    * Definitions for rcu_bh torture testing.
    */
@@@ -547,6 -567,25 +567,25 @@@ static struct rcu_torture_ops srcu_ops 
         .name           = "srcu"
   };
   
+ static void srcu_torture_synchronize_expedited(void)
+ {
+       synchronize_srcu_expedited(&srcu_ctl);
+ }
+ 
+ static struct rcu_torture_ops srcu_expedited_ops = {
+       .init           = srcu_torture_init,
+       .cleanup        = srcu_torture_cleanup,
+       .readlock       = srcu_torture_read_lock,
+       .read_delay     = srcu_read_delay,
+       .readunlock     = srcu_torture_read_unlock,
+       .completed      = srcu_torture_completed,
+       .deferred_free  = rcu_sync_torture_deferred_free,
+       .sync           = srcu_torture_synchronize_expedited,
+       .cb_barrier     = NULL,
+       .stats          = srcu_torture_stats,
+       .name           = "srcu_expedited"
+ };
+ 
   /*
    * Definitions for sched torture testing.
    */
@@@ -562,11 -601,6 +601,6 @@@ static void sched_torture_read_unlock(i
         preempt_enable();
   }
   
- static int sched_torture_completed(void)
- {
-       return 0;
- }
- 
   static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
   {
         call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
@@@ -583,7 -617,7 +617,7 @@@ static struct rcu_torture_ops sched_op
         .readlock       = sched_torture_read_lock,
         .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
         .readunlock     = sched_torture_read_unlock,
-       .completed      = sched_torture_completed,
+       .completed      = rcu_no_completed,
         .deferred_free  = rcu_sched_torture_deferred_free,
         .sync           = sched_torture_synchronize,
         .cb_barrier     = rcu_barrier_sched,
@@@ -592,13 -626,13 +626,13 @@@
         .name           = "sched"
   };
   
- static struct rcu_torture_ops sched_ops_sync = {
+ static struct rcu_torture_ops sched_sync_ops = {
         .init           = rcu_sync_torture_init,
         .cleanup        = NULL,
         .readlock       = sched_torture_read_lock,
         .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
         .readunlock     = sched_torture_read_unlock,
-       .completed      = sched_torture_completed,
+       .completed      = rcu_no_completed,
         .deferred_free  = rcu_sync_torture_deferred_free,
         .sync           = sched_torture_synchronize,
         .cb_barrier     = NULL,
@@@ -606,15 -640,13 +640,13 @@@
         .name           = "sched_sync"
   };
   
- extern int rcu_expedited_torture_stats(char *page);
- 
   static struct rcu_torture_ops sched_expedited_ops = {
         .init           = rcu_sync_torture_init,
         .cleanup        = NULL,
         .readlock       = sched_torture_read_lock,
         .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
         .readunlock     = sched_torture_read_unlock,
-       .completed      = sched_torture_completed,
+       .completed      = rcu_no_completed,
         .deferred_free  = rcu_sync_torture_deferred_free,
         .sync           = synchronize_sched_expedited,
         .cb_barrier     = NULL,
@@@ -650,7 -682,7 +682,7 @@@ rcu_torture_writer(void *arg
                 old_rp = rcu_torture_current;
                 rp->rtort_mbtest = 1;
                 rcu_assign_pointer(rcu_torture_current, rp);
-               smp_wmb();
+               smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */
                 if (old_rp) {
                         i = old_rp->rtort_pipe_count;
                         if (i > RCU_TORTURE_PIPE_LEN)
@@@ -731,13 -763,13 +763,13 @@@ static void rcu_torture_timer(unsigned 
                 /* Should not happen, but... */
                 pipe_count = RCU_TORTURE_PIPE_LEN;
         }
- -      __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+ +      __this_cpu_inc(rcu_torture_count[pipe_count]);
         completed = cur_ops->completed() - completed;
         if (completed > RCU_TORTURE_PIPE_LEN) {
                 /* Should not happen, but... */
                 completed = RCU_TORTURE_PIPE_LEN;
         }
- -      __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+ +      __this_cpu_inc(rcu_torture_batch[completed]);
         preempt_enable();
         cur_ops->readunlock(idx);
   }
@@@ -786,13 -818,13 +818,13 @@@ rcu_torture_reader(void *arg
                         /* Should not happen, but... */
                         pipe_count = RCU_TORTURE_PIPE_LEN;
                 }
- -              __this_cpu_inc(per_cpu_var(rcu_torture_count)[pipe_count]);
+ +              __this_cpu_inc(rcu_torture_count[pipe_count]);
                 completed = cur_ops->completed() - completed;
                 if (completed > RCU_TORTURE_PIPE_LEN) {
                         /* Should not happen, but... */
                         completed = RCU_TORTURE_PIPE_LEN;
                 }
- -              __this_cpu_inc(per_cpu_var(rcu_torture_batch)[completed]);
+ +              __this_cpu_inc(rcu_torture_batch[completed]);
                 preempt_enable();
                 cur_ops->readunlock(idx);
                 schedule();
@@@ -1099,9 -1131,10 +1131,10 @@@ rcu_torture_init(void
         int cpu;
         int firsterr = 0;
         static struct rcu_torture_ops *torture_ops[] =
-               { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
-                 &sched_expedited_ops,
-                 &srcu_ops, &sched_ops, &sched_ops_sync, };
+               { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
+                 &rcu_bh_ops, &rcu_bh_sync_ops,
+                 &srcu_ops, &srcu_expedited_ops,
+                 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
   
         mutex_lock(&fullstop_mutex);
   
@@@ -1112,8 -1145,12 +1145,12 @@@
                         break;
         }
         if (i == ARRAY_SIZE(torture_ops)) {
-               printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
+               printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n",
                        torture_type);
+               printk(KERN_ALERT "rcu-torture types:");
+               for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
+                       printk(KERN_ALERT " %s", torture_ops[i]->name);
+               printk(KERN_ALERT "\n");
                 mutex_unlock(&fullstop_mutex);
                 return -EINVAL;
         }
diff --combined kernel/trace/trace.c

index b808177af8168299b28fec9fe8d70bd9767936b0,0df1b0f2cb9e0717f2a21f6923389c1978a1fa04..ab2bbb0e942958e70812b43a24014255ea00b163
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -12,7 -12,7 +12,7 @@@
    *  Copyright (C) 2004 William Lee Irwin III
    */
   #include <linux/ring_buffer.h>
- #include <linux/utsrelease.h>
+ #include <generated/utsrelease.h>
   #include <linux/stacktrace.h>
   #include <linux/writeback.h>
   #include <linux/kallsyms.h>
@@@ -91,12 -91,12 +91,12 @@@ DEFINE_PER_CPU(int, ftrace_cpu_disabled
   static inline void ftrace_disable_cpu(void)
   {
         preempt_disable();
- -      __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled));
+ +      __this_cpu_inc(ftrace_cpu_disabled);
   }
   
   static inline void ftrace_enable_cpu(void)
   {
- -      __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled));
+ +      __this_cpu_dec(ftrace_cpu_disabled);
         preempt_enable();
   }
   
@@@ -129,7 -129,7 +129,7 @@@ static int tracing_set_tracer(const cha
   static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
   static char *default_bootup_tracer;
   
- static int __init set_ftrace(char *str)
+ static int __init set_cmdline_ftrace(char *str)
   {
         strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
         default_bootup_tracer = bootup_tracer_buf;
@@@ -137,7 -137,7 +137,7 @@@
         ring_buffer_expanded = 1;
         return 1;
   }
- __setup("ftrace=", set_ftrace);
+ __setup("ftrace=", set_cmdline_ftrace);
   
   static int __init set_ftrace_dump_on_oops(char *str)
   {
@@@ -313,7 -313,6 +313,6 @@@ static const char *trace_options[] = 
         "bin",
         "block",
         "stacktrace",
-       "sched-tree",
         "trace_printk",
         "ftrace_preempt",
         "branch",
@@@ -493,15 -492,15 +492,15 @@@ static ssize_t trace_seq_to_buffer(stru
    * protected by per_cpu spinlocks. But the action of the swap
    * needs its own lock.
    *
-  * This is defined as a raw_spinlock_t in order to help
+  * This is defined as a arch_spinlock_t in order to help
    * with performance when lockdep debugging is enabled.
    *
    * It is also used in other places outside the update_max_tr
    * so it needs to be defined outside of the
    * CONFIG_TRACER_MAX_TRACE.
    */
- static raw_spinlock_t ftrace_max_lock =
-       (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t ftrace_max_lock =
+       (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
   
   #ifdef CONFIG_TRACER_MAX_TRACE
   unsigned long __read_mostly   tracing_max_latency;
@@@ -555,13 -554,13 +554,13 @@@ update_max_tr(struct trace_array *tr, s
                 return;
   
         WARN_ON_ONCE(!irqs_disabled());
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
   
         tr->buffer = max_tr.buffer;
         max_tr.buffer = buf;
   
         __update_max_tr(tr, tsk, cpu);
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
   }
   
   /**
@@@ -581,7 -580,7 +580,7 @@@ update_max_tr_single(struct trace_arra
                 return;
   
         WARN_ON_ONCE(!irqs_disabled());
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
   
         ftrace_disable_cpu();
   
@@@ -603,7 -602,7 +602,7 @@@
         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
   
         __update_max_tr(tr, tsk, cpu);
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
   }
   #endif /* CONFIG_TRACER_MAX_TRACE */
   
@@@ -802,7 -801,7 +801,7 @@@ static unsigned map_pid_to_cmdline[PID_
   static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
   static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
   static int cmdline_idx;
- static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
   
   /* temporary disable recording */
   static atomic_t trace_record_cmdline_disabled __read_mostly;
@@@ -915,7 -914,7 +914,7 @@@ static void trace_save_cmdline(struct t
          * nor do we want to disable interrupts,
          * so if we miss here, then better luck next time.
          */
-       if (!__raw_spin_trylock(&trace_cmdline_lock))
+       if (!arch_spin_trylock(&trace_cmdline_lock))
                 return;
   
         idx = map_pid_to_cmdline[tsk->pid];
@@@ -940,7 -939,7 +939,7 @@@
   
         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
   
-       __raw_spin_unlock(&trace_cmdline_lock);
+       arch_spin_unlock(&trace_cmdline_lock);
   }
   
   void trace_find_cmdline(int pid, char comm[])
@@@ -958,14 -957,14 +957,14 @@@
         }
   
         preempt_disable();
-       __raw_spin_lock(&trace_cmdline_lock);
+       arch_spin_lock(&trace_cmdline_lock);
         map = map_pid_to_cmdline[pid];
         if (map != NO_CMDLINE_MAP)
                 strcpy(comm, saved_cmdlines[map]);
         else
                 strcpy(comm, "<...>");
   
-       __raw_spin_unlock(&trace_cmdline_lock);
+       arch_spin_unlock(&trace_cmdline_lock);
         preempt_enable();
   }
   
@@@ -1085,7 -1084,7 +1084,7 @@@ trace_function(struct trace_array *tr
         struct ftrace_entry *entry;
   
         /* If we are reading the ring buffer, don't trace */
- -      if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ +      if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
                 return;
   
         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@@ -1151,6 -1150,22 +1150,22 @@@ void __trace_stack(struct trace_array *
         __ftrace_trace_stack(tr->buffer, flags, skip, pc);
   }
   
+ /**
+  * trace_dump_stack - record a stack back trace in the trace buffer
+  */
+ void trace_dump_stack(void)
+ {
+       unsigned long flags;
+ 
+       if (tracing_disabled || tracing_selftest_running)
+               return;
+ 
+       local_save_flags(flags);
+ 
+       /* skipping 3 traces, seems to get us at the caller of this function */
+       __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
+ }
+ 
   void
   ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
   {
@@@ -1251,8 -1266,8 +1266,8 @@@ ftrace_special(unsigned long arg1, unsi
    */
   int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
   {
-       static raw_spinlock_t trace_buf_lock =
-               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t trace_buf_lock =
+               (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
         static u32 trace_buf[TRACE_BUF_SIZE];
   
         struct ftrace_event_call *call = &event_bprint;
@@@ -1283,7 -1298,7 +1298,7 @@@
   
         /* Lockdep uses trace_printk for lock tracing */
         local_irq_save(flags);
-       __raw_spin_lock(&trace_buf_lock);
+       arch_spin_lock(&trace_buf_lock);
         len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
   
         if (len > TRACE_BUF_SIZE || len < 0)
@@@ -1304,7 -1319,7 +1319,7 @@@
                 ring_buffer_unlock_commit(buffer, event);
   
   out_unlock:
-       __raw_spin_unlock(&trace_buf_lock);
+       arch_spin_unlock(&trace_buf_lock);
         local_irq_restore(flags);
   
   out:
@@@ -1334,7 -1349,7 +1349,7 @@@ int trace_array_printk(struct trace_arr
   int trace_array_vprintk(struct trace_array *tr,
                         unsigned long ip, const char *fmt, va_list args)
   {
-       static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED;
         static char trace_buf[TRACE_BUF_SIZE];
   
         struct ftrace_event_call *call = &event_print;
@@@ -1360,12 -1375,9 +1375,9 @@@
   
         pause_graph_tracing();
         raw_local_irq_save(irq_flags);
-       __raw_spin_lock(&trace_buf_lock);
+       arch_spin_lock(&trace_buf_lock);
         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
   
-       len = min(len, TRACE_BUF_SIZE-1);
-       trace_buf[len] = 0;
- 
         size = sizeof(*entry) + len + 1;
         buffer = tr->buffer;
         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
@@@ -1373,15 -1385,15 +1385,15 @@@
         if (!event)
                 goto out_unlock;
         entry = ring_buffer_event_data(event);
-       entry->ip                       = ip;
+       entry->ip = ip;
   
         memcpy(&entry->buf, trace_buf, len);
-       entry->buf[len] = 0;
+       entry->buf[len] = '\0';
         if (!filter_check_discard(call, entry, buffer, event))
                 ring_buffer_unlock_commit(buffer, event);
   
    out_unlock:
-       __raw_spin_unlock(&trace_buf_lock);
+       arch_spin_unlock(&trace_buf_lock);
         raw_local_irq_restore(irq_flags);
         unpause_graph_tracing();
    out:
@@@ -1393,7 -1405,7 +1405,7 @@@
   
   int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
   {
-       return trace_array_printk(&global_trace, ip, fmt, args);
+       return trace_array_vprintk(&global_trace, ip, fmt, args);
   }
   EXPORT_SYMBOL_GPL(trace_vprintk);
   
@@@ -1515,6 -1527,8 +1527,8 @@@ static void *s_next(struct seq_file *m
         int i = (int)*pos;
         void *ent;
   
+       WARN_ON_ONCE(iter->leftover);
+ 
         (*pos)++;
   
         /* can't go backwards */
@@@ -1613,8 -1627,16 +1627,16 @@@ static void *s_start(struct seq_file *m
                         ;
   
         } else {
-               l = *pos - 1;
-               p = s_next(m, p, &l);
+               /*
+                * If we overflowed the seq_file before, then we want
+                * to just reuse the trace_seq buffer again.
+                */
+               if (iter->leftover)
+                       p = iter;
+               else {
+                       l = *pos - 1;
+                       p = s_next(m, p, &l);
+               }
         }
   
         trace_event_read_lock();
@@@ -1922,6 -1944,7 +1944,7 @@@ static enum print_line_t print_trace_li
   static int s_show(struct seq_file *m, void *v)
   {
         struct trace_iterator *iter = v;
+       int ret;
   
         if (iter->ent == NULL) {
                 if (iter->tr) {
@@@ -1941,9 -1964,27 +1964,27 @@@
                         if (!(trace_flags & TRACE_ITER_VERBOSE))
                                 print_func_help_header(m);
                 }
+       } else if (iter->leftover) {
+               /*
+                * If we filled the seq_file buffer earlier, we
+                * want to just show it now.
+                */
+               ret = trace_print_seq(m, &iter->seq);
+ 
+               /* ret should this time be zero, but you never know */
+               iter->leftover = ret;
+ 
         } else {
                 print_trace_line(iter);
-               trace_print_seq(m, &iter->seq);
+               ret = trace_print_seq(m, &iter->seq);
+               /*
+                * If we overflow the seq_file buffer, then it will
+                * ask us for this data again at start up.
+                * Use that instead.
+                *  ret is 0 if seq_file write succeeded.
+                *        -1 otherwise.
+                */
+               iter->leftover = ret;
         }
   
         return 0;
@@@ -2253,7 -2294,7 +2294,7 @@@ tracing_cpumask_write(struct file *filp
         mutex_lock(&tracing_cpumask_update_lock);
   
         local_irq_disable();
-       __raw_spin_lock(&ftrace_max_lock);
+       arch_spin_lock(&ftrace_max_lock);
         for_each_tracing_cpu(cpu) {
                 /*
                  * Increase/decrease the disabled counter if we are
@@@ -2268,7 -2309,7 +2309,7 @@@
                         atomic_dec(&global_trace.data[cpu]->disabled);
                 }
         }
-       __raw_spin_unlock(&ftrace_max_lock);
+       arch_spin_unlock(&ftrace_max_lock);
         local_irq_enable();
   
         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
@@@ -2290,67 -2331,49 +2331,49 @@@ static const struct file_operations tra
         .write          = tracing_cpumask_write,
   };
   
- static ssize_t
- tracing_trace_options_read(struct file *filp, char __user *ubuf,
-                      size_t cnt, loff_t *ppos)
+ static int tracing_trace_options_show(struct seq_file *m, void *v)
   {
         struct tracer_opt *trace_opts;
         u32 tracer_flags;
-       int len = 0;
-       char *buf;
-       int r = 0;
         int i;
   
- 
-       /* calculate max size */
-       for (i = 0; trace_options[i]; i++) {
-               len += strlen(trace_options[i]);
-               len += 3; /* "no" and newline */
-       }
- 
         mutex_lock(&trace_types_lock);
         tracer_flags = current_trace->flags->val;
         trace_opts = current_trace->flags->opts;
   
-       /*
-        * Increase the size with names of options specific
-        * of the current tracer.
-        */
-       for (i = 0; trace_opts[i].name; i++) {
-               len += strlen(trace_opts[i].name);
-               len += 3; /* "no" and newline */
-       }
- 
-       /* +1 for \0 */
-       buf = kmalloc(len + 1, GFP_KERNEL);
-       if (!buf) {
-               mutex_unlock(&trace_types_lock);
-               return -ENOMEM;
-       }
- 
         for (i = 0; trace_options[i]; i++) {
                 if (trace_flags & (1 << i))
-                       r += sprintf(buf + r, "%s\n", trace_options[i]);
+                       seq_printf(m, "%s\n", trace_options[i]);
                 else
-                       r += sprintf(buf + r, "no%s\n", trace_options[i]);
+                       seq_printf(m, "no%s\n", trace_options[i]);
         }
   
         for (i = 0; trace_opts[i].name; i++) {
                 if (tracer_flags & trace_opts[i].bit)
-                       r += sprintf(buf + r, "%s\n",
-                               trace_opts[i].name);
+                       seq_printf(m, "%s\n", trace_opts[i].name);
                 else
-                       r += sprintf(buf + r, "no%s\n",
-                               trace_opts[i].name);
+                       seq_printf(m, "no%s\n", trace_opts[i].name);
         }
         mutex_unlock(&trace_types_lock);
   
-       WARN_ON(r >= len + 1);
+       return 0;
+ }
   
-       r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ static int __set_tracer_option(struct tracer *trace,
+                              struct tracer_flags *tracer_flags,
+                              struct tracer_opt *opts, int neg)
+ {
+       int ret;
   
-       kfree(buf);
-       return r;
+       ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
+       if (ret)
+               return ret;
+ 
+       if (neg)
+               tracer_flags->val &= ~opts->bit;
+       else
+               tracer_flags->val |= opts->bit;
+       return 0;
   }
   
   /* Try to assign a tracer specific option */
@@@ -2358,33 -2381,17 +2381,17 @@@ static int set_tracer_option(struct tra
   {
         struct tracer_flags *tracer_flags = trace->flags;
         struct tracer_opt *opts = NULL;
-       int ret = 0, i = 0;
-       int len;
+       int i;
   
         for (i = 0; tracer_flags->opts[i].name; i++) {
                 opts = &tracer_flags->opts[i];
-               len = strlen(opts->name);
   
-               if (strncmp(cmp, opts->name, len) == 0) {
-                       ret = trace->set_flag(tracer_flags->val,
-                               opts->bit, !neg);
-                       break;
-               }
+               if (strcmp(cmp, opts->name) == 0)
+                       return __set_tracer_option(trace, trace->flags,
+                                                  opts, neg);
         }
-       /* Not found */
-       if (!tracer_flags->opts[i].name)
-               return -EINVAL;
- 
-       /* Refused to handle */
-       if (ret)
-               return ret;
- 
-       if (neg)
-               tracer_flags->val &= ~opts->bit;
-       else
-               tracer_flags->val |= opts->bit;
   
-       return 0;
+       return -EINVAL;
   }
   
   static void set_tracer_flags(unsigned int mask, int enabled)
@@@ -2404,7 -2411,7 +2411,7 @@@ tracing_trace_options_write(struct fil
                         size_t cnt, loff_t *ppos)
   {
         char buf[64];
-       char *cmp = buf;
+       char *cmp;
         int neg = 0;
         int ret;
         int i;
@@@ -2416,16 -2423,15 +2423,15 @@@
                 return -EFAULT;
   
         buf[cnt] = 0;
+       cmp = strstrip(buf);
   
-       if (strncmp(buf, "no", 2) == 0) {
+       if (strncmp(cmp, "no", 2) == 0) {
                 neg = 1;
                 cmp += 2;
         }
   
         for (i = 0; trace_options[i]; i++) {
-               int len = strlen(trace_options[i]);
- 
-               if (strncmp(cmp, trace_options[i], len) == 0) {
+               if (strcmp(cmp, trace_options[i]) == 0) {
                         set_tracer_flags(1 << i, !neg);
                         break;
                 }
@@@ -2440,14 -2446,23 +2446,23 @@@
                         return ret;
         }
   
-       filp->f_pos += cnt;
+       *ppos += cnt;
   
         return cnt;
   }
   
+ static int tracing_trace_options_open(struct inode *inode, struct file *file)
+ {
+       if (tracing_disabled)
+               return -ENODEV;
+       return single_open(file, tracing_trace_options_show, NULL);
+ }
+ 
   static const struct file_operations tracing_iter_fops = {
-       .open           = tracing_open_generic,
-       .read           = tracing_trace_options_read,
+       .open           = tracing_trace_options_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
         .write          = tracing_trace_options_write,
   };
   
@@@ -2582,7 -2597,7 +2597,7 @@@ tracing_ctrl_write(struct file *filp, c
         }
         mutex_unlock(&trace_types_lock);
   
-       filp->f_pos += cnt;
+       *ppos += cnt;
   
         return cnt;
   }
@@@ -2764,7 -2779,7 +2779,7 @@@ tracing_set_trace_write(struct file *fi
         if (err)
                 return err;
   
-       filp->f_pos += ret;
+       *ppos += ret;
   
         return ret;
   }
@@@ -2897,6 -2912,10 +2912,10 @@@ static int tracing_release_pipe(struct 
         else
                 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
   
+ 
+       if (iter->trace->pipe_close)
+               iter->trace->pipe_close(iter);
+ 
         mutex_unlock(&trace_types_lock);
   
         free_cpumask_var(iter->started);
@@@ -3103,7 -3122,7 +3122,7 @@@ static void tracing_spd_release_pipe(st
         __free_page(spd->pages[idx]);
   }
   
- static struct pipe_buf_operations tracing_pipe_buf_ops = {
+ static const struct pipe_buf_operations tracing_pipe_buf_ops = {
         .can_merge              = 0,
         .map                    = generic_pipe_buf_map,
         .unmap                  = generic_pipe_buf_unmap,
@@@ -3299,7 -3318,7 +3318,7 @@@ tracing_entries_write(struct file *filp
                 }
         }
   
-       filp->f_pos += cnt;
+       *ppos += cnt;
   
         /* If check pages failed, return ENOMEM */
         if (tracing_disabled)
@@@ -3334,7 -3353,6 +3353,6 @@@ tracing_mark_write(struct file *filp, c
                                         size_t cnt, loff_t *fpos)
   {
         char *buf;
-       char *end;
   
         if (tracing_disabled)
                 return -EINVAL;
@@@ -3342,7 -3360,7 +3360,7 @@@
         if (cnt > TRACE_BUF_SIZE)
                 cnt = TRACE_BUF_SIZE;
   
-       buf = kmalloc(cnt + 1, GFP_KERNEL);
+       buf = kmalloc(cnt + 2, GFP_KERNEL);
         if (buf == NULL)
                 return -ENOMEM;
   
@@@ -3350,35 -3368,31 +3368,31 @@@
                 kfree(buf);
                 return -EFAULT;
         }
+       if (buf[cnt-1] != '\n') {
+               buf[cnt] = '\n';
+               buf[cnt+1] = '\0';
+       } else
+               buf[cnt] = '\0';
   
-       /* Cut from the first nil or newline. */
-       buf[cnt] = '\0';
-       end = strchr(buf, '\n');
-       if (end)
-               *end = '\0';
- 
-       cnt = mark_printk("%s\n", buf);
+       cnt = mark_printk("%s", buf);
         kfree(buf);
         *fpos += cnt;
   
         return cnt;
   }
   
- static ssize_t tracing_clock_read(struct file *filp, char __user *ubuf,
-                                 size_t cnt, loff_t *ppos)
+ static int tracing_clock_show(struct seq_file *m, void *v)
   {
-       char buf[64];
-       int bufiter = 0;
         int i;
   
         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
-               bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter,
+               seq_printf(m,
                         "%s%s%s%s", i ? " " : "",
                         i == trace_clock_id ? "[" : "", trace_clocks[i].name,
                         i == trace_clock_id ? "]" : "");
-       bufiter += snprintf(buf + bufiter, sizeof(buf) - bufiter, "\n");
+       seq_putc(m, '\n');
   
-       return simple_read_from_buffer(ubuf, cnt, ppos, buf, bufiter);
+       return 0;
   }
   
   static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
@@@ -3420,6 -3434,13 +3434,13 @@@
         return cnt;
   }
   
+ static int tracing_clock_open(struct inode *inode, struct file *file)
+ {
+       if (tracing_disabled)
+               return -ENODEV;
+       return single_open(file, tracing_clock_show, NULL);
+ }
+ 
   static const struct file_operations tracing_max_lat_fops = {
         .open           = tracing_open_generic,
         .read           = tracing_max_lat_read,
@@@ -3458,8 -3479,10 +3479,10 @@@ static const struct file_operations tra
   };
   
   static const struct file_operations trace_clock_fops = {
-       .open           = tracing_open_generic,
-       .read           = tracing_clock_read,
+       .open           = tracing_clock_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
         .write          = tracing_clock_write,
   };
   
@@@ -3589,7 -3612,7 +3612,7 @@@ static void buffer_pipe_buf_get(struct 
   }
   
   /* Pipe buffer operations for a buffer. */
- static struct pipe_buf_operations buffer_pipe_buf_ops = {
+ static const struct pipe_buf_operations buffer_pipe_buf_ops = {
         .can_merge              = 0,
         .map                    = generic_pipe_buf_map,
         .unmap                  = generic_pipe_buf_unmap,
@@@ -3730,7 -3753,7 +3753,7 @@@ tracing_stats_read(struct file *filp, c
   
         s = kmalloc(sizeof(*s), GFP_KERNEL);
         if (!s)
-               return ENOMEM;
+               return -ENOMEM;
   
         trace_seq_init(s);
   
@@@ -3920,39 -3943,16 +3943,16 @@@ trace_options_write(struct file *filp, 
         if (ret < 0)
                 return ret;
   
-       ret = 0;
-       switch (val) {
-       case 0:
-               /* do nothing if already cleared */
-               if (!(topt->flags->val & topt->opt->bit))
-                       break;
- 
-               mutex_lock(&trace_types_lock);
-               if (current_trace->set_flag)
-                       ret = current_trace->set_flag(topt->flags->val,
-                                                     topt->opt->bit, 0);
-               mutex_unlock(&trace_types_lock);
-               if (ret)
-                       return ret;
-               topt->flags->val &= ~topt->opt->bit;
-               break;
-       case 1:
-               /* do nothing if already set */
-               if (topt->flags->val & topt->opt->bit)
-                       break;
+       if (val != 0 && val != 1)
+               return -EINVAL;
   
+       if (!!(topt->flags->val & topt->opt->bit) != val) {
                 mutex_lock(&trace_types_lock);
-               if (current_trace->set_flag)
-                       ret = current_trace->set_flag(topt->flags->val,
-                                                     topt->opt->bit, 1);
+               ret = __set_tracer_option(current_trace, topt->flags,
+                                         topt->opt, !val);
                 mutex_unlock(&trace_types_lock);
                 if (ret)
                         return ret;
-               topt->flags->val |= topt->opt->bit;
-               break;
- 
-       default:
-               return -EINVAL;
         }
   
         *ppos += cnt;
@@@ -4279,8 -4279,8 +4279,8 @@@ trace_printk_seq(struct trace_seq *s
   
   static void __ftrace_dump(bool disable_tracing)
   {
-       static raw_spinlock_t ftrace_dump_lock =
-               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+       static arch_spinlock_t ftrace_dump_lock =
+               (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
         /* use static because iter can be a bit big for the stack */
         static struct trace_iterator iter;
         unsigned int old_userobj;
@@@ -4290,7 -4290,7 +4290,7 @@@
   
         /* only one dump */
         local_irq_save(flags);
-       __raw_spin_lock(&ftrace_dump_lock);
+       arch_spin_lock(&ftrace_dump_lock);
         if (dump_ran)
                 goto out;
   
@@@ -4365,7 -4365,7 +4365,7 @@@
         }
   
    out:
-       __raw_spin_unlock(&ftrace_dump_lock);
+       arch_spin_unlock(&ftrace_dump_lock);
         local_irq_restore(flags);
   }
   
diff --combined kernel/trace/trace_functions_graph.c

index 8614e3241ff86e5cffb7afd57887adfe525c1412,b1342c5d37cfb821cfb96fcfd610cb95cdfb1082..9d976f3249a3e044a38c0633eeb73a882bbbd828
--- 1/kernel/trace/trace_functions_graph.c
--- 2/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@@ -14,9 -14,20 +14,20 @@@
   #include "trace.h"
   #include "trace_output.h"
   
- struct fgraph_data {
+ struct fgraph_cpu_data {
         pid_t           last_pid;
         int             depth;
+       int             ignore;
+ };
+ 
+ struct fgraph_data {
+       struct fgraph_cpu_data          *cpu_data;
+ 
+       /* Place to preserve last processed entry. */
+       struct ftrace_graph_ent_entry   ent;
+       struct ftrace_graph_ret_entry   ret;
+       int                             failed;
+       int                             cpu;
   };
   
   #define TRACE_GRAPH_INDENT    2
@@@ -176,7 -187,7 +187,7 @@@ static int __trace_graph_entry(struct t
         struct ring_buffer *buffer = tr->buffer;
         struct ftrace_graph_ent_entry *entry;
   
- -      if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ +      if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
                 return 0;
   
         event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@@ -240,7 -251,7 +251,7 @@@ static void __trace_graph_return(struc
         struct ring_buffer *buffer = tr->buffer;
         struct ftrace_graph_ret_entry *entry;
   
- -      if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled))))
+ +      if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
                 return;
   
         event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@@ -384,7 -395,7 +395,7 @@@ verif_pid(struct trace_seq *s, pid_t pi
         if (!data)
                 return TRACE_TYPE_HANDLED;
   
-       last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
+       last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
   
         if (*last_pid == pid)
                 return TRACE_TYPE_HANDLED;
@@@ -435,26 -446,49 +446,49 @@@ static struct ftrace_graph_ret_entry 
   get_return_for_leaf(struct trace_iterator *iter,
                 struct ftrace_graph_ent_entry *curr)
   {
-       struct ring_buffer_iter *ring_iter;
+       struct fgraph_data *data = iter->private;
+       struct ring_buffer_iter *ring_iter = NULL;
         struct ring_buffer_event *event;
         struct ftrace_graph_ret_entry *next;
   
-       ring_iter = iter->buffer_iter[iter->cpu];
+       /*
+        * If the previous output failed to write to the seq buffer,
+        * then we just reuse the data from before.
+        */
+       if (data && data->failed) {
+               curr = &data->ent;
+               next = &data->ret;
+       } else {
   
-       /* First peek to compare current entry and the next one */
-       if (ring_iter)
-               event = ring_buffer_iter_peek(ring_iter, NULL);
-       else {
-       /* We need to consume the current entry to see the next one */
-               ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
-               event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
-                                       NULL);
-       }
+               ring_iter = iter->buffer_iter[iter->cpu];
+ 
+               /* First peek to compare current entry and the next one */
+               if (ring_iter)
+                       event = ring_buffer_iter_peek(ring_iter, NULL);
+               else {
+                       /*
+                        * We need to consume the current entry to see
+                        * the next one.
+                        */
+                       ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+                       event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+                                                NULL);
+               }
   
-       if (!event)
-               return NULL;
+               if (!event)
+                       return NULL;
+ 
+               next = ring_buffer_event_data(event);
   
-       next = ring_buffer_event_data(event);
+               if (data) {
+                       /*
+                        * Save current and next entries for later reference
+                        * if the output fails.
+                        */
+                       data->ent = *curr;
+                       data->ret = *next;
+               }
+       }
   
         if (next->ent.type != TRACE_GRAPH_RET)
                 return NULL;
@@@ -640,7 -674,7 +674,7 @@@ print_graph_entry_leaf(struct trace_ite
   
         if (data) {
                 int cpu = iter->cpu;
-               int *depth = &(per_cpu_ptr(data, cpu)->depth);
+               int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
   
                 /*
                  * Comments display at + 1 to depth. Since
@@@ -688,7 -722,7 +722,7 @@@ print_graph_entry_nested(struct trace_i
   
         if (data) {
                 int cpu = iter->cpu;
-               int *depth = &(per_cpu_ptr(data, cpu)->depth);
+               int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
   
                 *depth = call->depth;
         }
@@@ -782,19 -816,34 +816,34 @@@ static enum print_line_
   print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
                         struct trace_iterator *iter)
   {
-       int cpu = iter->cpu;
+       struct fgraph_data *data = iter->private;
         struct ftrace_graph_ent *call = &field->graph_ent;
         struct ftrace_graph_ret_entry *leaf_ret;
+       static enum print_line_t ret;
+       int cpu = iter->cpu;
   
         if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
                 return TRACE_TYPE_PARTIAL_LINE;
   
         leaf_ret = get_return_for_leaf(iter, field);
         if (leaf_ret)
-               return print_graph_entry_leaf(iter, field, leaf_ret, s);
+               ret = print_graph_entry_leaf(iter, field, leaf_ret, s);
         else
-               return print_graph_entry_nested(iter, field, s, cpu);
+               ret = print_graph_entry_nested(iter, field, s, cpu);
   
+       if (data) {
+               /*
+                * If we failed to write our output, then we need to make
+                * note of it. Because we already consumed our entry.
+                */
+               if (s->full) {
+                       data->failed = 1;
+                       data->cpu = cpu;
+               } else
+                       data->failed = 0;
+       }
+ 
+       return ret;
   }
   
   static enum print_line_t
@@@ -810,7 -859,7 +859,7 @@@ print_graph_return(struct ftrace_graph_
   
         if (data) {
                 int cpu = iter->cpu;
-               int *depth = &(per_cpu_ptr(data, cpu)->depth);
+               int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
   
                 /*
                  * Comments display at + 1 to depth. This is the
@@@ -873,7 -922,7 +922,7 @@@ print_graph_comment(struct trace_seq *s
         int i;
   
         if (data)
-               depth = per_cpu_ptr(data, iter->cpu)->depth;
+               depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
   
         if (print_graph_prologue(iter, s, 0, 0))
                 return TRACE_TYPE_PARTIAL_LINE;
@@@ -941,8 -990,33 +990,33 @@@
   enum print_line_t
   print_graph_function(struct trace_iterator *iter)
   {
+       struct ftrace_graph_ent_entry *field;
+       struct fgraph_data *data = iter->private;
         struct trace_entry *entry = iter->ent;
         struct trace_seq *s = &iter->seq;
+       int cpu = iter->cpu;
+       int ret;
+ 
+       if (data && per_cpu_ptr(data->cpu_data, cpu)->ignore) {
+               per_cpu_ptr(data->cpu_data, cpu)->ignore = 0;
+               return TRACE_TYPE_HANDLED;
+       }
+ 
+       /*
+        * If the last output failed, there's a possibility we need
+        * to print out the missing entry which would never go out.
+        */
+       if (data && data->failed) {
+               field = &data->ent;
+               iter->cpu = data->cpu;
+               ret = print_graph_entry(field, s, iter);
+               if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
+                       per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
+                       ret = TRACE_TYPE_NO_CONSUME;
+               }
+               iter->cpu = cpu;
+               return ret;
+       }
   
         switch (entry->type) {
         case TRACE_GRAPH_ENT: {
@@@ -952,7 -1026,7 +1026,7 @@@
                  * sizeof(struct ftrace_graph_ent_entry) is very small,
                  * it can be safely saved at the stack.
                  */
-               struct ftrace_graph_ent_entry *field, saved;
+               struct ftrace_graph_ent_entry saved;
                 trace_assign_type(field, entry);
                 saved = *field;
                 return print_graph_entry(&saved, s, iter);
@@@ -1030,31 -1104,54 +1104,54 @@@ static void print_graph_headers(struct 
   static void graph_trace_open(struct trace_iterator *iter)
   {
         /* pid and depth on the last trace processed */
-       struct fgraph_data *data = alloc_percpu(struct fgraph_data);
+       struct fgraph_data *data;
         int cpu;
   
+       iter->private = NULL;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
         if (!data)
-               pr_warning("function graph tracer: not enough memory\n");
-       else
-               for_each_possible_cpu(cpu) {
-                       pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
-                       int *depth = &(per_cpu_ptr(data, cpu)->depth);
-                       *pid = -1;
-                       *depth = 0;
-               }
+               goto out_err;
+ 
+       data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+       if (!data->cpu_data)
+               goto out_err_free;
+ 
+       for_each_possible_cpu(cpu) {
+               pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
+               int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
+               int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
+               *pid = -1;
+               *depth = 0;
+               *ignore = 0;
+       }
   
         iter->private = data;
+ 
+       return;
+ 
+  out_err_free:
+       kfree(data);
+  out_err:
+       pr_warning("function graph tracer: not enough memory\n");
   }
   
   static void graph_trace_close(struct trace_iterator *iter)
   {
-       free_percpu(iter->private);
+       struct fgraph_data *data = iter->private;
+ 
+       if (data) {
+               free_percpu(data->cpu_data);
+               kfree(data);
+       }
   }
   
   static struct tracer graph_trace __read_mostly = {
         .name           = "function_graph",
         .open           = graph_trace_open,
+       .pipe_open      = graph_trace_open,
         .close          = graph_trace_close,
+       .pipe_close     = graph_trace_close,
         .wait_pipe      = poll_wait_pipe,
         .init           = graph_trace_init,
         .reset          = graph_trace_reset,
diff --combined mm/percpu.c

index 77c6f7994a46cc4add460e5f05ff055123b8a1a4,442010cc91c6c82eb8489e64d21500baa52b5911..626e43c99498d878281c20211b56afd361e93af2
--- 1/mm/percpu.c
--- 2/mm/percpu.c
+++ b/mm/percpu.c
@@@ -72,6 -72,7 +72,7 @@@
   #include <asm/cacheflush.h>
   #include <asm/sections.h>
   #include <asm/tlbflush.h>
+ #include <asm/io.h>
   
   #define PCPU_SLOT_BASE_SHIFT          5       /* 1-31 shares the same slot */
   #define PCPU_DFL_MAP_ALLOC            16      /* start a map with 16 ents */
@@@ -151,7 -152,10 +152,10 @@@ static int pcpu_reserved_chunk_limit
    *
    * During allocation, pcpu_alloc_mutex is kept locked all the time and
    * pcpu_lock is grabbed and released as necessary.  All actual memory
-  * allocations are done using GFP_KERNEL with pcpu_lock released.
+  * allocations are done using GFP_KERNEL with pcpu_lock released.  In
+  * general, percpu memory can't be allocated with irq off but
+  * irqsave/restore are still used in alloc path so that it can be used
+  * from early init path - sched_init() specifically.
    *
    * Free path accesses and alters only the index data structures, so it
    * can be safely called from atomic context.  When memory needs to be
@@@ -350,63 -354,86 +354,86 @@@ static struct pcpu_chunk *pcpu_chunk_ad
   }
   
   /**
-  * pcpu_extend_area_map - extend area map for allocation
-  * @chunk: target chunk
+  * pcpu_need_to_extend - determine whether chunk area map needs to be extended
+  * @chunk: chunk of interest
    *
-  * Extend area map of @chunk so that it can accomodate an allocation.
-  * A single allocation can split an area into three areas, so this
-  * function makes sure that @chunk->map has at least two extra slots.
+  * Determine whether area map of @chunk needs to be extended to
+  * accomodate a new allocation.
    *
    * CONTEXT:
-  * pcpu_alloc_mutex, pcpu_lock.  pcpu_lock is released and reacquired
-  * if area map is extended.
+  * pcpu_lock.
    *
    * RETURNS:
-  * 0 if noop, 1 if successfully extended, -errno on failure.
+  * New target map allocation length if extension is necessary, 0
+  * otherwise.
    */
- static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
-       __releases(lock) __acquires(lock)
+ static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
   {
         int new_alloc;
-       int *new;
-       size_t size;
   
-       /* has enough? */
         if (chunk->map_alloc >= chunk->map_used + 2)
                 return 0;
   
-       spin_unlock_irq(&pcpu_lock);
- 
         new_alloc = PCPU_DFL_MAP_ALLOC;
         while (new_alloc < chunk->map_used + 2)
                 new_alloc *= 2;
   
-       new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
-       if (!new) {
-               spin_lock_irq(&pcpu_lock);
+       return new_alloc;
+ }
+ 
+ /**
+  * pcpu_extend_area_map - extend area map of a chunk
+  * @chunk: chunk of interest
+  * @new_alloc: new target allocation length of the area map
+  *
+  * Extend area map of @chunk to have @new_alloc entries.
+  *
+  * CONTEXT:
+  * Does GFP_KERNEL allocation.  Grabs and releases pcpu_lock.
+  *
+  * RETURNS:
+  * 0 on success, -errno on failure.
+  */
+ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
+ {
+       int *old = NULL, *new = NULL;
+       size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
+       unsigned long flags;
+ 
+       new = pcpu_mem_alloc(new_size);
+       if (!new)
                 return -ENOMEM;
-       }
   
-       /*
-        * Acquire pcpu_lock and switch to new area map.  Only free
-        * could have happened inbetween, so map_used couldn't have
-        * grown.
-        */
-       spin_lock_irq(&pcpu_lock);
-       BUG_ON(new_alloc < chunk->map_used + 2);
+       /* acquire pcpu_lock and switch to new area map */
+       spin_lock_irqsave(&pcpu_lock, flags);
+ 
+       if (new_alloc <= chunk->map_alloc)
+               goto out_unlock;
   
-       size = chunk->map_alloc * sizeof(chunk->map[0]);
-       memcpy(new, chunk->map, size);
+       old_size = chunk->map_alloc * sizeof(chunk->map[0]);
+       memcpy(new, chunk->map, old_size);
   
         /*
          * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
          * one of the first chunks and still using static map.
          */
         if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-               pcpu_mem_free(chunk->map, size);
+               old = chunk->map;
   
         chunk->map_alloc = new_alloc;
         chunk->map = new;
+       new = NULL;
+ 
+ out_unlock:
+       spin_unlock_irqrestore(&pcpu_lock, flags);
+ 
+       /*
+        * pcpu_mem_free() might end up calling vfree() which uses
+        * IRQ-unsafe lock and thus can't be called under pcpu_lock.
+        */
+       pcpu_mem_free(old, old_size);
+       pcpu_mem_free(new, new_size);
+ 
         return 0;
   }
   
@@@ -886,10 -913,11 +913,10 @@@ static void pcpu_depopulate_chunk(struc
         int rs, re;
   
         /* quick path, check whether it's empty already */
- -      pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
- -              if (rs == page_start && re == page_end)
- -                      return;
- -              break;
- -      }
+ +      rs = page_start;
+ +      pcpu_next_unpop(chunk, &rs, &re, page_end);
+ +      if (rs == page_start && re == page_end)
+ +              return;
   
         /* immutable chunks can't be depopulated */
         WARN_ON(chunk->immutable);
@@@ -940,10 -968,11 +967,10 @@@ static int pcpu_populate_chunk(struct p
         int rs, re, rc;
   
         /* quick path, check whether all pages are already there */
- -      pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) {
- -              if (rs == page_start && re == page_end)
- -                      goto clear;
- -              break;
- -      }
+ +      rs = page_start;
+ +      pcpu_next_pop(chunk, &rs, &re, page_end);
+ +      if (rs == page_start && re == page_end)
+ +              goto clear;
   
         /* need to allocate and map pages, this chunk can't be immutable */
         WARN_ON(chunk->immutable);
@@@ -1043,7 -1072,8 +1070,8 @@@ static void *pcpu_alloc(size_t size, si
         static int warn_limit = 10;
         struct pcpu_chunk *chunk;
         const char *err;
-       int slot, off;
+       int slot, off, new_alloc;
+       unsigned long flags;
   
         if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
                 WARN(true, "illegal size (%zu) or align (%zu) for "
@@@ -1052,19 -1082,30 +1080,30 @@@
         }
   
         mutex_lock(&pcpu_alloc_mutex);
-       spin_lock_irq(&pcpu_lock);
+       spin_lock_irqsave(&pcpu_lock, flags);
   
         /* serve reserved allocations from the reserved chunk if available */
         if (reserved && pcpu_reserved_chunk) {
                 chunk = pcpu_reserved_chunk;
-               if (size > chunk->contig_hint ||
-                   pcpu_extend_area_map(chunk) < 0) {
-                       err = "failed to extend area map of reserved chunk";
+ 
+               if (size > chunk->contig_hint) {
+                       err = "alloc from reserved chunk failed";
                         goto fail_unlock;
                 }
+ 
+               while ((new_alloc = pcpu_need_to_extend(chunk))) {
+                       spin_unlock_irqrestore(&pcpu_lock, flags);
+                       if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
+                               err = "failed to extend area map of reserved chunk";
+                               goto fail_unlock_mutex;
+                       }
+                       spin_lock_irqsave(&pcpu_lock, flags);
+               }
+ 
                 off = pcpu_alloc_area(chunk, size, align);
                 if (off >= 0)
                         goto area_found;
+ 
                 err = "alloc from reserved chunk failed";
                 goto fail_unlock;
         }
@@@ -1076,14 -1117,20 +1115,20 @@@ restart
                         if (size > chunk->contig_hint)
                                 continue;
   
-                       switch (pcpu_extend_area_map(chunk)) {
-                       case 0:
-                               break;
-                       case 1:
-                               goto restart;   /* pcpu_lock dropped, restart */
-                       default:
-                               err = "failed to extend area map";
-                               goto fail_unlock;
+                       new_alloc = pcpu_need_to_extend(chunk);
+                       if (new_alloc) {
+                               spin_unlock_irqrestore(&pcpu_lock, flags);
+                               if (pcpu_extend_area_map(chunk,
+                                                        new_alloc) < 0) {
+                                       err = "failed to extend area map";
+                                       goto fail_unlock_mutex;
+                               }
+                               spin_lock_irqsave(&pcpu_lock, flags);
+                               /*
+                                * pcpu_lock has been dropped, need to
+                                * restart cpu_slot list walking.
+                                */
+                               goto restart;
                         }
   
                         off = pcpu_alloc_area(chunk, size, align);
@@@ -1093,7 -1140,7 +1138,7 @@@
         }
   
         /* hmmm... no space left, create a new chunk */
-       spin_unlock_irq(&pcpu_lock);
+       spin_unlock_irqrestore(&pcpu_lock, flags);
   
         chunk = alloc_pcpu_chunk();
         if (!chunk) {
@@@ -1101,16 -1148,16 +1146,16 @@@
                 goto fail_unlock_mutex;
         }
   
-       spin_lock_irq(&pcpu_lock);
+       spin_lock_irqsave(&pcpu_lock, flags);
         pcpu_chunk_relocate(chunk, -1);
         goto restart;
   
   area_found:
-       spin_unlock_irq(&pcpu_lock);
+       spin_unlock_irqrestore(&pcpu_lock, flags);
   
         /* populate, map and clear the area */
         if (pcpu_populate_chunk(chunk, off, size)) {
-               spin_lock_irq(&pcpu_lock);
+               spin_lock_irqsave(&pcpu_lock, flags);
                 pcpu_free_area(chunk, off);
                 err = "failed to populate";
                 goto fail_unlock;
@@@ -1122,7 -1169,7 +1167,7 @@@
         return __addr_to_pcpu_ptr(chunk->base_addr + off);
   
   fail_unlock:
-       spin_unlock_irq(&pcpu_lock);
+       spin_unlock_irqrestore(&pcpu_lock, flags);
   fail_unlock_mutex:
         mutex_unlock(&pcpu_alloc_mutex);
         if (warn_limit) {
@@@ -1254,6 -1301,27 +1299,27 @@@ void free_percpu(void *ptr
   }
   EXPORT_SYMBOL_GPL(free_percpu);
   
+ /**
+  * per_cpu_ptr_to_phys - convert translated percpu address to physical address
+  * @addr: the address to be converted to physical address
+  *
+  * Given @addr which is dereferenceable address obtained via one of
+  * percpu access macros, this function translates it into its physical
+  * address.  The caller is responsible for ensuring @addr stays valid
+  * until this function finishes.
+  *
+  * RETURNS:
+  * The physical address for @addr.
+  */
+ phys_addr_t per_cpu_ptr_to_phys(void *addr)
+ {
+       if ((unsigned long)addr < VMALLOC_START ||
+                       (unsigned long)addr >= VMALLOC_END)
+               return __pa(addr);
+       else
+               return page_to_phys(vmalloc_to_page(addr));
+ }
+ 
   static inline size_t pcpu_calc_fc_sizes(size_t static_size,
                                         size_t reserved_size,
                                         ssize_t *dyn_sizep)
author	Tejun Heo <tj@kernel.org>
	Tue, 5 Jan 2010 00:17:33 +0000 (09:17 +0900)
committer	Tejun Heo <tj@kernel.org>
	Tue, 5 Jan 2010 00:17:33 +0000 (09:17 +0900)
		1	2
arch/blackfin/mach-common/entry.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/system.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic/nmi.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/head_32.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/vmlinux.lds.S	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/compiler.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/percpu.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/vmstat.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcutorture.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_functions_graph.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/percpu.c	patch \|	diff1 \|	diff2 \|	blob \| history