Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 13 Mar 2010 22:43:01 +0000 (14:43 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 13 Mar 2010 22:43:01 +0000 (14:43 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Mar 2010 22:43:01 +0000 (14:43 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 13 Mar 2010 22:43:01 +0000 (14:43 -0800)
diff --combined arch/x86/kernel/cpu/mcheck/mce.c

index 28cba46bf32c5221afcfb2115237750d4da56dec,4442e9e898c24107bf67545ba941c9778d3bed31..bd58de4d7a29e451616df150ba59256e9619bee7
--- 1/arch/x86/kernel/cpu/mcheck/mce.c
--- 2/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@@ -46,6 -46,11 +46,11 @@@
   
   #include "mce-internal.h"
   
+ #define rcu_dereference_check_mce(p) \
+       rcu_dereference_check((p), \
+                             rcu_read_lock_sched_held() || \
+                             lockdep_is_held(&mce_read_mutex))
+ 
   #define CREATE_TRACE_POINTS
   #include <trace/events/mce.h>
   
@@@ -158,7 -163,7 +163,7 @@@ void mce_log(struct mce *mce
         mce->finished = 0;
         wmb();
         for (;;) {
-               entry = rcu_dereference(mcelog.next);
+               entry = rcu_dereference_check_mce(mcelog.next);
                 for (;;) {
                         /*
                          * When the buffer fills up discard new entries.
@@@ -1500,7 -1505,7 +1505,7 @@@ static ssize_t mce_read(struct file *fi
                 return -ENOMEM;
   
         mutex_lock(&mce_read_mutex);
-       next = rcu_dereference(mcelog.next);
+       next = rcu_dereference_check_mce(mcelog.next);
   
         /* Only supports full reads right now */
         if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
@@@ -1565,7 -1570,7 +1570,7 @@@ timeout
   static unsigned int mce_poll(struct file *file, poll_table *wait)
   {
         poll_wait(file, &mce_wait, wait);
-       if (rcu_dereference(mcelog.next))
+       if (rcu_dereference_check_mce(mcelog.next))
                 return POLLIN | POLLRDNORM;
         return 0;
   }
@@@ -2044,7 -2049,6 +2049,7 @@@ static __init void mce_init_banks(void
                 struct mce_bank *b = &mce_banks[i];
                 struct sysdev_attribute *a = &b->attr;
   
+ +              sysfs_attr_init(&a->attr);
                 a->attr.name    = b->attrname;
                 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
   
diff --combined include/linux/rcupdate.h

index a005cac5e3024aa61eca92a76aa7d1473d7c7df7,75921b83c0ab791b24bad0f7a74e3d831611b07e..3024050c82a12610222a1b5e7d1c40d44e339b5a
--- 1/include/linux/rcupdate.h
--- 2/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@@ -41,10 -41,6 +41,10 @@@
   #include <linux/lockdep.h>
   #include <linux/completion.h>
   
+ +#ifdef CONFIG_RCU_TORTURE_TEST
+ +extern int rcutorture_runnable; /* for sysctl */
+ +#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+ +
   /**
    * struct rcu_head - callback structure for use with RCU
    * @next: next update requests in a list
@@@ -101,6 -97,11 +101,11 @@@ extern struct lockdep_map rcu_sched_loc
   # define rcu_read_release_sched() \
                 lock_release(&rcu_sched_lock_map, 1, _THIS_IP_)
   
+ static inline int debug_lockdep_rcu_enabled(void)
+ {
+       return likely(rcu_scheduler_active && debug_locks);
+ }
+ 
   /**
    * rcu_read_lock_held - might we be in RCU read-side critical section?
    *
@@@ -108,12 -109,14 +113,14 @@@
    * an RCU read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
    * this assumes we are in an RCU read-side critical section unless it can
    * prove otherwise.
+  *
+  * Check rcu_scheduler_active to prevent false positives during boot.
    */
   static inline int rcu_read_lock_held(void)
   {
-       if (debug_locks)
-               return lock_is_held(&rcu_lock_map);
-       return 1;
+       if (!debug_lockdep_rcu_enabled())
+               return 1;
+       return lock_is_held(&rcu_lock_map);
   }
   
   /**
@@@ -123,12 -126,14 +130,14 @@@
    * an RCU-bh read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
    * this assumes we are in an RCU-bh read-side critical section unless it can
    * prove otherwise.
+  *
+  * Check rcu_scheduler_active to prevent false positives during boot.
    */
   static inline int rcu_read_lock_bh_held(void)
   {
-       if (debug_locks)
-               return lock_is_held(&rcu_bh_lock_map);
-       return 1;
+       if (!debug_lockdep_rcu_enabled())
+               return 1;
+       return lock_is_held(&rcu_bh_lock_map);
   }
   
   /**
@@@ -139,15 -144,26 +148,26 @@@
    * this assumes we are in an RCU-sched read-side critical section unless it
    * can prove otherwise.  Note that disabling of preemption (including
    * disabling irqs) counts as an RCU-sched read-side critical section.
+  *
+  * Check rcu_scheduler_active to prevent false positives during boot.
    */
+ #ifdef CONFIG_PREEMPT
   static inline int rcu_read_lock_sched_held(void)
   {
         int lockdep_opinion = 0;
   
+       if (!debug_lockdep_rcu_enabled())
+               return 1;
         if (debug_locks)
                 lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-       return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active;
+       return lockdep_opinion || preempt_count() != 0;
+ }
+ #else /* #ifdef CONFIG_PREEMPT */
+ static inline int rcu_read_lock_sched_held(void)
+ {
+       return 1;
   }
+ #endif /* #else #ifdef CONFIG_PREEMPT */
   
   #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
   
@@@ -168,10 -184,17 +188,17 @@@ static inline int rcu_read_lock_bh_held
         return 1;
   }
   
+ #ifdef CONFIG_PREEMPT
   static inline int rcu_read_lock_sched_held(void)
   {
-       return preempt_count() != 0 || !rcu_scheduler_active;
+       return !rcu_scheduler_active || preempt_count() != 0;
+ }
+ #else /* #ifdef CONFIG_PREEMPT */
+ static inline int rcu_read_lock_sched_held(void)
+ {
+       return 1;
   }
+ #endif /* #else #ifdef CONFIG_PREEMPT */
   
   #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
   
@@@ -188,7 -211,7 +215,7 @@@
    */
   #define rcu_dereference_check(p, c) \
         ({ \
-               if (debug_locks && !(c)) \
+               if (debug_lockdep_rcu_enabled() && !(c)) \
                         lockdep_rcu_dereference(__FILE__, __LINE__); \
                 rcu_dereference_raw(p); \
         })
diff --combined include/linux/sched.h

index 8d70ff802da28ac9084eb2ea23c794019cce28b2,a47af2064dcc77cd123b3f0dd0b2707d0fa9618b..dad7f668ebf70041f3897102a0ff13a1a456edad
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -97,7 -97,7 +97,7 @@@ struct sched_param 
   struct exec_domain;
   struct futex_pi_state;
   struct robust_list_head;
- -struct bio;
+ +struct bio_list;
   struct fs_struct;
   struct bts_context;
   struct perf_event_context;
@@@ -258,6 -258,10 +258,10 @@@ extern spinlock_t mmlist_lock
   
   struct task_struct;
   
+ #ifdef CONFIG_PROVE_RCU
+ extern int lockdep_tasklist_lock_is_held(void);
+ #endif /* #ifdef CONFIG_PROVE_RCU */
+ 
   extern void sched_init(void);
   extern void sched_init_smp(void);
   extern asmlinkage void schedule_tail(struct task_struct *prev);
@@@ -396,6 -400,60 +400,6 @@@ extern void arch_unmap_area_topdown(str
   static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
   #endif
   
- -#if USE_SPLIT_PTLOCKS
- -/*
- - * The mm counters are not protected by its page_table_lock,
- - * so must be incremented atomically.
- - */
- -#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
- -#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
- -#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
- -#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
- -#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
- -
- -#else  /* !USE_SPLIT_PTLOCKS */
- -/*
- - * The mm counters are protected by its page_table_lock,
- - * so can be incremented directly.
- - */
- -#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
- -#define get_mm_counter(mm, member) ((mm)->_##member)
- -#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
- -#define inc_mm_counter(mm, member) (mm)->_##member++
- -#define dec_mm_counter(mm, member) (mm)->_##member--
- -
- -#endif /* !USE_SPLIT_PTLOCKS */
- -
- -#define get_mm_rss(mm)                                        \
- -      (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
- -#define update_hiwater_rss(mm)        do {                    \
- -      unsigned long _rss = get_mm_rss(mm);            \
- -      if ((mm)->hiwater_rss < _rss)                   \
- -              (mm)->hiwater_rss = _rss;               \
- -} while (0)
- -#define update_hiwater_vm(mm) do {                    \
- -      if ((mm)->hiwater_vm < (mm)->total_vm)          \
- -              (mm)->hiwater_vm = (mm)->total_vm;      \
- -} while (0)
- -
- -static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
- -{
- -      return max(mm->hiwater_rss, get_mm_rss(mm));
- -}
- -
- -static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
- -                                       struct mm_struct *mm)
- -{
- -      unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
- -
- -      if (*maxrss < hiwater_rss)
- -              *maxrss = hiwater_rss;
- -}
- -
- -static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
- -{
- -      return max(mm->hiwater_vm, mm->total_vm);
- -}
   
   extern void set_dumpable(struct mm_struct *mm, int value);
   extern int get_dumpable(struct mm_struct *mm);
@@@ -1220,9 -1278,7 +1224,9 @@@ struct task_struct 
         struct plist_node pushable_tasks;
   
         struct mm_struct *mm, *active_mm;
- -
+ +#if defined(SPLIT_RSS_COUNTING)
+ +      struct task_rss_stat    rss_stat;
+ +#endif
   /* task state */
         int exit_state;
         int exit_code, exit_signal;
@@@ -1402,7 -1458,7 +1406,7 @@@
         void *journal_info;
   
   /* stacked block device info */
- -      struct bio *bio_list, **bio_tail;
+ +      struct bio_list *bio_list;
   
   /* VM state */
         struct reclaim_state *reclaim_state;
@@@ -1473,7 -1529,7 +1477,7 @@@
   
         struct list_head        *scm_work_list;
   #ifdef CONFIG_FUNCTION_GRAPH_TRACER
- -      /* Index of current stored adress in ret_stack */
+ +      /* Index of current stored address in ret_stack */
         int curr_ret_stack;
         /* Stack of return addresses for return function tracing */
         struct ftrace_ret_stack *ret_stack;
@@@ -2391,7 -2447,9 +2395,7 @@@ void thread_group_cputimer(struct task_
   
   static inline void thread_group_cputime_init(struct signal_struct *sig)
   {
- -      sig->cputimer.cputime = INIT_CPUTIME;
         spin_lock_init(&sig->cputimer.lock);
- -      sig->cputimer.running = 0;
   }
   
   static inline void thread_group_cputime_free(struct signal_struct *sig)
diff --combined kernel/exit.c

index ce1e48c2d93d3dbae4769ecc518066df383f0310,fed3a4db6f04ba9b6b77fa6db0073e3197d90de6..cce59cb5ee6aececf1b663d39a1de3fb15b4b426
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -87,7 -87,7 +87,7 @@@ static void __exit_signal(struct task_s
   
         sighand = rcu_dereference_check(tsk->sighand,
                                         rcu_read_lock_held() ||
-                                       lockdep_is_held(&tasklist_lock));
+                                       lockdep_tasklist_lock_is_held());
         spin_lock(&sighand->siglock);
   
         posix_cpu_timers_exit(tsk);
@@@ -952,8 -952,7 +952,8 @@@ NORET_TYPE void do_exit(long code
                                 preempt_count());
   
         acct_update_integrals(tsk);
- -
+ +      /* sync mm's RSS info before statistics gathering */
+ +      sync_mm_rss(tsk, tsk->mm);
         group_dead = atomic_dec_and_test(&tsk->signal->live);
         if (group_dead) {
                 hrtimer_cancel(&tsk->signal->real_timer);
@@@ -1189,7 -1188,7 +1189,7 @@@ static int wait_task_zombie(struct wait
   
         if (unlikely(wo->wo_flags & WNOWAIT)) {
                 int exit_code = p->exit_code;
- -              int why, status;
+ +              int why;
   
                 get_task_struct(p);
                 read_unlock(&tasklist_lock);
diff --combined kernel/fork.c

index 1beb6c303c416051e357e7e22637e29bffffcacc,8691c540a4703763ab3ba04ade5daef422f3270f..4799c5f0e6d089a6287b7c7742a34f37146932c1
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -86,7 -86,14 +86,14 @@@ int max_threads;            /* tunable limit on n
   DEFINE_PER_CPU(unsigned long, process_counts) = 0;
   
   __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
- EXPORT_SYMBOL_GPL(tasklist_lock);
+ 
+ #ifdef CONFIG_PROVE_RCU
+ int lockdep_tasklist_lock_is_held(void)
+ {
+       return lockdep_is_held(&tasklist_lock);
+ }
+ EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
+ #endif /* #ifdef CONFIG_PROVE_RCU */
   
   int nr_processes(void)
   {
@@@ -329,17 -336,15 +336,17 @@@ static int dup_mmap(struct mm_struct *m
                 if (!tmp)
                         goto fail_nomem;
                 *tmp = *mpnt;
+ +              INIT_LIST_HEAD(&tmp->anon_vma_chain);
                 pol = mpol_dup(vma_policy(mpnt));
                 retval = PTR_ERR(pol);
                 if (IS_ERR(pol))
                         goto fail_nomem_policy;
                 vma_set_policy(tmp, pol);
+ +              if (anon_vma_fork(tmp, mpnt))
+ +                      goto fail_nomem_anon_vma_fork;
                 tmp->vm_flags &= ~VM_LOCKED;
                 tmp->vm_mm = mm;
                 tmp->vm_next = NULL;
- -              anon_vma_link(tmp);
                 file = tmp->vm_file;
                 if (file) {
                         struct inode *inode = file->f_path.dentry->d_inode;
@@@ -394,8 -399,6 +401,8 @@@ out
         flush_tlb_mm(oldmm);
         up_write(&oldmm->mmap_sem);
         return retval;
+ +fail_nomem_anon_vma_fork:
+ +      mpol_put(pol);
   fail_nomem_policy:
         kmem_cache_free(vm_area_cachep, tmp);
   fail_nomem:
@@@ -459,7 -462,8 +466,7 @@@ static struct mm_struct * mm_init(struc
                 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
         mm->core_state = NULL;
         mm->nr_ptes = 0;
- -      set_mm_counter(mm, file_rss, 0);
- -      set_mm_counter(mm, anon_rss, 0);
+ +      memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
         spin_lock_init(&mm->page_table_lock);
         mm->free_area_cache = TASK_UNMAPPED_BASE;
         mm->cached_hole_size = ~0UL;
@@@ -828,14 -832,23 +835,14 @@@ void __cleanup_sighand(struct sighand_s
    */
   static void posix_cpu_timers_init_group(struct signal_struct *sig)
   {
+ +      unsigned long cpu_limit;
+ +
         /* Thread group counters. */
         thread_group_cputime_init(sig);
   
- -      /* Expiration times and increments. */
- -      sig->it[CPUCLOCK_PROF].expires = cputime_zero;
- -      sig->it[CPUCLOCK_PROF].incr = cputime_zero;
- -      sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
- -      sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
- -
- -      /* Cached expiration times. */
- -      sig->cputime_expires.prof_exp = cputime_zero;
- -      sig->cputime_expires.virt_exp = cputime_zero;
- -      sig->cputime_expires.sched_exp = 0;
- -
- -      if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
- -              sig->cputime_expires.prof_exp =
- -                      secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+ +      cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+ +      if (cpu_limit != RLIM_INFINITY) {
+ +              sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
                 sig->cputimer.running = 1;
         }
   
@@@ -852,7 -865,7 +859,7 @@@ static int copy_signal(unsigned long cl
         if (clone_flags & CLONE_THREAD)
                 return 0;
   
- -      sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+ +      sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
         tsk->signal = sig;
         if (!sig)
                 return -ENOMEM;
@@@ -860,21 -873,46 +867,21 @@@
         atomic_set(&sig->count, 1);
         atomic_set(&sig->live, 1);
         init_waitqueue_head(&sig->wait_chldexit);
- -      sig->flags = 0;
         if (clone_flags & CLONE_NEWPID)
                 sig->flags |= SIGNAL_UNKILLABLE;
- -      sig->group_exit_code = 0;
- -      sig->group_exit_task = NULL;
- -      sig->group_stop_count = 0;
         sig->curr_target = tsk;
         init_sigpending(&sig->shared_pending);
         INIT_LIST_HEAD(&sig->posix_timers);
   
         hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- -      sig->it_real_incr.tv64 = 0;
         sig->real_timer.function = it_real_fn;
   
- -      sig->leader = 0;        /* session leadership doesn't inherit */
- -      sig->tty_old_pgrp = NULL;
- -      sig->tty = NULL;
- -
- -      sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
- -      sig->gtime = cputime_zero;
- -      sig->cgtime = cputime_zero;
- -#ifndef CONFIG_VIRT_CPU_ACCOUNTING
- -      sig->prev_utime = sig->prev_stime = cputime_zero;
- -#endif
- -      sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
- -      sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
- -      sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
- -      sig->maxrss = sig->cmaxrss = 0;
- -      task_io_accounting_init(&sig->ioac);
- -      sig->sum_sched_runtime = 0;
- -      taskstats_tgid_init(sig);
- -
         task_lock(current->group_leader);
         memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
         task_unlock(current->group_leader);
   
         posix_cpu_timers_init_group(sig);
   
- -      acct_init_pacct(&sig->pacct);
- -
         tty_audit_fork(sig);
   
         sig->oom_adj = current->signal->oom_adj;
@@@ -1003,7 -1041,7 +1010,7 @@@ static struct task_struct *copy_process
   #endif
         retval = -EAGAIN;
         if (atomic_read(&p->real_cred->user->processes) >=
- -                      p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
+ +                      task_rlimit(p, RLIMIT_NPROC)) {
                 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
                     p->real_cred->user != INIT_USER)
                         goto bad_fork_free;
diff --combined kernel/pid.c

index 86b296943e5f274c2b4965a73275661a4c6c727e,b6064405f367e6cde3c909a559a6fdb324801ce1..aebb30d9c233df40876afa35ef21cc5c306590a1
--- 1/kernel/pid.c
--- 2/kernel/pid.c
+++ b/kernel/pid.c
@@@ -367,7 -367,9 +367,9 @@@ struct task_struct *pid_task(struct pi
         struct task_struct *result = NULL;
         if (pid) {
                 struct hlist_node *first;
-               first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock));
+               first = rcu_dereference_check(pid->tasks[type].first,
+                                             rcu_read_lock_held() ||
+                                             lockdep_tasklist_lock_is_held());
                 if (first)
                         result = hlist_entry(first, struct task_struct, pids[(type)].node);
         }
@@@ -376,7 -378,7 +378,7 @@@
   EXPORT_SYMBOL(pid_task);
   
   /*
- - * Must be called under rcu_read_lock() or with tasklist_lock read-held.
+ + * Must be called under rcu_read_lock().
    */
   struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
   {
diff --combined kernel/trace/ftrace.c

index bb53edbb5c8c7fc7a8f4a4f2db423aa1512b2a43,8c5adc0e5db3f2199fa54cd9b422bb65a163d940..d9062f5cc0c01e598670d651fba80b87ccb68a57
--- 1/kernel/trace/ftrace.c
--- 2/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@@ -27,6 -27,7 +27,7 @@@
   #include <linux/ctype.h>
   #include <linux/list.h>
   #include <linux/hash.h>
+ #include <linux/rcupdate.h>
   
   #include <trace/events/sched.h>
   
@@@ -84,18 -85,26 +85,22 @@@ ftrace_func_t ftrace_trace_function __r
   ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
   ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
   
- -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- -static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
- -#endif
- -
+ /*
+  * Traverse the ftrace_list, invoking all entries.  The reason that we
+  * can use rcu_dereference_raw() is that elements removed from this list
+  * are simply leaked, so there is no need to interact with a grace-period
+  * mechanism.  The rcu_dereference_raw() calls are needed to handle
+  * concurrent insertions into the ftrace_list.
+  *
+  * Silly Alpha and silly pointer-speculation compiler optimizations!
+  */
   static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
   {
-       struct ftrace_ops *op = ftrace_list;
- 
-       /* in case someone actually ports this to alpha! */
-       read_barrier_depends();
+       struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
   
         while (op != &ftrace_list_end) {
-               /* silly alpha */
-               read_barrier_depends();
                 op->func(ip, parent_ip);
-               op = op->next;
+               op = rcu_dereference_raw(op->next); /*see above*/
         };
   }
   
@@@ -150,8 -159,7 +155,7 @@@ static int __register_ftrace_function(s
          * the ops->next pointer is valid before another CPU sees
          * the ops pointer included into the ftrace_list.
          */
-       smp_wmb();
-       ftrace_list = ops;
+       rcu_assign_pointer(ftrace_list, ops);
   
         if (ftrace_enabled) {
                 ftrace_func_t func;
@@@ -2272,8 -2280,6 +2276,8 @@@ __setup("ftrace_filter=", set_ftrace_fi
   
   #ifdef CONFIG_FUNCTION_GRAPH_TRACER
   static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
+ +static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
+ +
   static int __init set_graph_function(char *str)
   {
         strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@@ -3349,7 -3355,6 +3353,7 @@@ void ftrace_graph_init_task(struct task
   {
         /* Make sure we do not use the parent ret_stack */
         t->ret_stack = NULL;
+ +      t->curr_ret_stack = -1;
   
         if (ftrace_graph_active) {
                 struct ftrace_ret_stack *ret_stack;
@@@ -3359,6 -3364,7 +3363,6 @@@
                                 GFP_KERNEL);
                 if (!ret_stack)
                         return;
- -              t->curr_ret_stack = -1;
                 atomic_set(&t->tracing_graph_pause, 0);
                 atomic_set(&t->trace_overrun, 0);
                 t->ftrace_timestamp = 0;
diff --combined mm/mempolicy.c

index bda230e52acd94b0475640c4fd6b97895c6d3b9a,3cec080faa23780c7ca457153272bef79b3a58e4..643f66e101878015cbea8b3dc9ac1eb948d2d702
--- 1/mm/mempolicy.c
--- 2/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@@ -563,50 -563,24 +563,50 @@@ static int policy_vma(struct vm_area_st
   }
   
   /* Step 2: apply policy to a range and do splits. */
- -static int mbind_range(struct vm_area_struct *vma, unsigned long start,
- -                     unsigned long end, struct mempolicy *new)
+ +static int mbind_range(struct mm_struct *mm, unsigned long start,
+ +                     unsigned long end, struct mempolicy *new_pol)
   {
         struct vm_area_struct *next;
- -      int err;
+ +      struct vm_area_struct *prev;
+ +      struct vm_area_struct *vma;
+ +      int err = 0;
+ +      pgoff_t pgoff;
+ +      unsigned long vmstart;
+ +      unsigned long vmend;
   
- -      err = 0;
- -      for (; vma && vma->vm_start < end; vma = next) {
+ +      vma = find_vma_prev(mm, start, &prev);
+ +      if (!vma || vma->vm_start > start)
+ +              return -EFAULT;
+ +
+ +      for (; vma && vma->vm_start < end; prev = vma, vma = next) {
                 next = vma->vm_next;
- -              if (vma->vm_start < start)
- -                      err = split_vma(vma->vm_mm, vma, start, 1);
- -              if (!err && vma->vm_end > end)
- -                      err = split_vma(vma->vm_mm, vma, end, 0);
- -              if (!err)
- -                      err = policy_vma(vma, new);
+ +              vmstart = max(start, vma->vm_start);
+ +              vmend   = min(end, vma->vm_end);
+ +
+ +              pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ +              prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
+ +                                vma->anon_vma, vma->vm_file, pgoff, new_pol);
+ +              if (prev) {
+ +                      vma = prev;
+ +                      next = vma->vm_next;
+ +                      continue;
+ +              }
+ +              if (vma->vm_start != vmstart) {
+ +                      err = split_vma(vma->vm_mm, vma, vmstart, 1);
+ +                      if (err)
+ +                              goto out;
+ +              }
+ +              if (vma->vm_end != vmend) {
+ +                      err = split_vma(vma->vm_mm, vma, vmend, 0);
+ +                      if (err)
+ +                              goto out;
+ +              }
+ +              err = policy_vma(vma, new_pol);
                 if (err)
- -                      break;
+ +                      goto out;
         }
+ +
+ + out:
         return err;
   }
   
@@@ -888,36 -862,36 +888,36 @@@ int do_migrate_pages(struct mm_struct *
         if (err)
                 goto out;
   
- -/*
- - * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
- - * bit in 'to' is not also set in 'tmp'.  Clear the found 'source'
- - * bit in 'tmp', and return that <source, dest> pair for migration.
- - * The pair of nodemasks 'to' and 'from' define the map.
- - *
- - * If no pair of bits is found that way, fallback to picking some
- - * pair of 'source' and 'dest' bits that are not the same.  If the
- - * 'source' and 'dest' bits are the same, this represents a node
- - * that will be migrating to itself, so no pages need move.
- - *
- - * If no bits are left in 'tmp', or if all remaining bits left
- - * in 'tmp' correspond to the same bit in 'to', return false
- - * (nothing left to migrate).
- - *
- - * This lets us pick a pair of nodes to migrate between, such that
- - * if possible the dest node is not already occupied by some other
- - * source node, minimizing the risk of overloading the memory on a
- - * node that would happen if we migrated incoming memory to a node
- - * before migrating outgoing memory source that same node.
- - *
- - * A single scan of tmp is sufficient.  As we go, we remember the
- - * most recent <s, d> pair that moved (s != d).  If we find a pair
- - * that not only moved, but what's better, moved to an empty slot
- - * (d is not set in tmp), then we break out then, with that pair.
- - * Otherwise when we finish scannng from_tmp, we at least have the
- - * most recent <s, d> pair that moved.  If we get all the way through
- - * the scan of tmp without finding any node that moved, much less
- - * moved to an empty node, then there is nothing left worth migrating.
- - */
+ +      /*
+ +       * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
+ +       * bit in 'to' is not also set in 'tmp'.  Clear the found 'source'
+ +       * bit in 'tmp', and return that <source, dest> pair for migration.
+ +       * The pair of nodemasks 'to' and 'from' define the map.
+ +       *
+ +       * If no pair of bits is found that way, fallback to picking some
+ +       * pair of 'source' and 'dest' bits that are not the same.  If the
+ +       * 'source' and 'dest' bits are the same, this represents a node
+ +       * that will be migrating to itself, so no pages need move.
+ +       *
+ +       * If no bits are left in 'tmp', or if all remaining bits left
+ +       * in 'tmp' correspond to the same bit in 'to', return false
+ +       * (nothing left to migrate).
+ +       *
+ +       * This lets us pick a pair of nodes to migrate between, such that
+ +       * if possible the dest node is not already occupied by some other
+ +       * source node, minimizing the risk of overloading the memory on a
+ +       * node that would happen if we migrated incoming memory to a node
+ +       * before migrating outgoing memory source that same node.
+ +       *
+ +       * A single scan of tmp is sufficient.  As we go, we remember the
+ +       * most recent <s, d> pair that moved (s != d).  If we find a pair
+ +       * that not only moved, but what's better, moved to an empty slot
+ +       * (d is not set in tmp), then we break out then, with that pair.
+ +       * Otherwise when we finish scannng from_tmp, we at least have the
+ +       * most recent <s, d> pair that moved.  If we get all the way through
+ +       * the scan of tmp without finding any node that moved, much less
+ +       * moved to an empty node, then there is nothing left worth migrating.
+ +       */
   
         tmp = *from_nodes;
         while (!nodes_empty(tmp)) {
@@@ -1073,7 -1047,7 +1073,7 @@@ static long do_mbind(unsigned long star
         if (!IS_ERR(vma)) {
                 int nr_failed = 0;
   
- -              err = mbind_range(vma, start, end, new);
+ +              err = mbind_range(mm, start, end, new);
   
                 if (!list_empty(&pagelist))
                         nr_failed = migrate_pages(&pagelist, new_vma_page,
@@@ -1756,10 -1730,12 +1756,12 @@@ struct mempolicy *__mpol_dup(struct mem
   
         if (!new)
                 return ERR_PTR(-ENOMEM);
+       rcu_read_lock();
         if (current_cpuset_is_being_rebound()) {
                 nodemask_t mems = cpuset_mems_allowed(current);
                 mpol_rebind_policy(old, &mems);
         }
+       rcu_read_unlock();
         *new = *old;
         atomic_set(&new->refcnt, 1);
         return new;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 13 Mar 2010 22:43:01 +0000 (14:43 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 13 Mar 2010 22:43:01 +0000 (14:43 -0800)
		1	2
arch/x86/kernel/cpu/mcheck/mce.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/rcupdate.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/pid.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/ftrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/mempolicy.c	patch \|	diff1 \|	diff2 \|	blob \| history