From: Paul E. McKenney Date: Tue, 4 Aug 2015 15:40:58 +0000 (-0700) Subject: Merge branches 'fixes.2015.07.22a' and 'initexp.2015.08.04a' into HEAD X-Git-Tag: v4.3-rc1~142^2~1^2~1 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=8ff4fbfd69a6c7b9598f8c1f2df34f89bac02c1a;p=linux-2.6-block.git Merge branches 'fixes.2015.07.22a' and 'initexp.2015.08.04a' into HEAD fixes.2015.07.22a: Miscellaneous fixes. initexp.2015.08.04a: Initialization and expedited updates. (Single branch due to conflicts.) --- 8ff4fbfd69a6c7b9598f8c1f2df34f89bac02c1a diff --cc kernel/rcu/tree.c index 0a73d26357a2,439112e9d1b3..9f75f25cc5d9 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@@ -3253,72 -3272,146 +3272,198 @@@ void cond_synchronize_rcu(unsigned lon } EXPORT_SYMBOL_GPL(cond_synchronize_rcu); +/** + * get_state_synchronize_sched - Snapshot current RCU-sched state + * + * Returns a cookie that is used by a later call to cond_synchronize_sched() + * to determine whether or not a full grace period has elapsed in the + * meantime. + */ +unsigned long get_state_synchronize_sched(void) +{ + /* + * Any prior manipulation of RCU-protected data must happen + * before the load from ->gpnum. + */ + smp_mb(); /* ^^^ */ + + /* + * Make sure this load happens before the purportedly + * time-consuming work between get_state_synchronize_sched() + * and cond_synchronize_sched(). + */ + return smp_load_acquire(&rcu_sched_state.gpnum); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_sched); + +/** + * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period + * + * @oldstate: return value from earlier call to get_state_synchronize_sched() + * + * If a full RCU-sched grace period has elapsed since the earlier call to + * get_state_synchronize_sched(), just return. Otherwise, invoke + * synchronize_sched() to wait for a full grace period. + * + * Yes, this function does not take counter wrap into account. But + * counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!), + * so waiting for one additional grace period should be just fine. + */ +void cond_synchronize_sched(unsigned long oldstate) +{ + unsigned long newstate; + + /* + * Ensure that this load happens before any RCU-destructive + * actions the caller might carry out after we return. + */ + newstate = smp_load_acquire(&rcu_sched_state.completed); + if (ULONG_CMP_GE(oldstate, newstate)) + synchronize_sched(); +} +EXPORT_SYMBOL_GPL(cond_synchronize_sched); + - static int synchronize_sched_expedited_cpu_stop(void *data) + /* Adjust sequence number for start of update-side operation. */ + static void rcu_seq_start(unsigned long *sp) + { + WRITE_ONCE(*sp, *sp + 1); + smp_mb(); /* Ensure update-side operation after counter increment. */ + WARN_ON_ONCE(!(*sp & 0x1)); + } + + /* Adjust sequence number for end of update-side operation. */ + static void rcu_seq_end(unsigned long *sp) + { + smp_mb(); /* Ensure update-side operation before counter increment. */ + WRITE_ONCE(*sp, *sp + 1); + WARN_ON_ONCE(*sp & 0x1); + } + + /* Take a snapshot of the update side's sequence number. */ + static unsigned long rcu_seq_snap(unsigned long *sp) + { + unsigned long s; + + smp_mb(); /* Caller's modifications seen first by other CPUs. */ + s = (READ_ONCE(*sp) + 3) & ~0x1; + smp_mb(); /* Above access must not bleed into critical section. */ + return s; + } + + /* + * Given a snapshot from rcu_seq_snap(), determine whether or not a + * full update-side operation has occurred. + */ + static bool rcu_seq_done(unsigned long *sp, unsigned long s) + { + return ULONG_CMP_GE(READ_ONCE(*sp), s); + } + + /* Wrapper functions for expedited grace periods. */ + static void rcu_exp_gp_seq_start(struct rcu_state *rsp) + { + rcu_seq_start(&rsp->expedited_sequence); + } + static void rcu_exp_gp_seq_end(struct rcu_state *rsp) + { + rcu_seq_end(&rsp->expedited_sequence); + smp_mb(); /* Ensure that consecutive grace periods serialize. */ + } + static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp) + { + return rcu_seq_snap(&rsp->expedited_sequence); + } + static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) + { + return rcu_seq_done(&rsp->expedited_sequence, s); + } + + /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ + static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp, + atomic_long_t *stat, unsigned long s) { + if (rcu_exp_gp_seq_done(rsp, s)) { + if (rnp) + mutex_unlock(&rnp->exp_funnel_mutex); + else if (rdp) + mutex_unlock(&rdp->exp_funnel_mutex); + /* Ensure test happens before caller kfree(). */ + smp_mb__before_atomic(); /* ^^^ */ + atomic_long_inc(stat); + return true; + } + return false; + } + + /* + * Funnel-lock acquisition for expedited grace periods. Returns a + * pointer to the root rcu_node structure, or NULL if some other + * task did the expedited grace period for us. + */ + static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) + { + struct rcu_data *rdp; + struct rcu_node *rnp0; + struct rcu_node *rnp1 = NULL; + /* - * There must be a full memory barrier on each affected CPU - * between the time that try_stop_cpus() is called and the - * time that it returns. - * - * In the current initial implementation of cpu_stop, the - * above condition is already met when the control reaches - * this point and the following smp_mb() is not strictly - * necessary. Do smp_mb() anyway for documentation and - * robustness against future implementation changes. + * First try directly acquiring the root lock in order to reduce + * latency in the common case where expedited grace periods are + * rare. We check mutex_is_locked() to avoid pathological levels of + * memory contention on ->exp_funnel_mutex in the heavy-load case. */ - smp_mb(); /* See above comment block. */ + rnp0 = rcu_get_root(rsp); + if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) { + if (mutex_trylock(&rnp0->exp_funnel_mutex)) { + if (sync_exp_work_done(rsp, rnp0, NULL, + &rsp->expedited_workdone0, s)) + return NULL; + return rnp0; + } + } + + /* + * Each pass through the following loop works its way + * up the rcu_node tree, returning if others have done the + * work or otherwise falls through holding the root rnp's + * ->exp_funnel_mutex. The mapping from CPU to rcu_node structure + * can be inexact, as it is just promoting locality and is not + * strictly needed for correctness. + */ + rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); + if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s)) + return NULL; + mutex_lock(&rdp->exp_funnel_mutex); + rnp0 = rdp->mynode; + for (; rnp0 != NULL; rnp0 = rnp0->parent) { + if (sync_exp_work_done(rsp, rnp1, rdp, + &rsp->expedited_workdone2, s)) + return NULL; + mutex_lock(&rnp0->exp_funnel_mutex); + if (rnp1) + mutex_unlock(&rnp1->exp_funnel_mutex); + else + mutex_unlock(&rdp->exp_funnel_mutex); + rnp1 = rnp0; + } + if (sync_exp_work_done(rsp, rnp1, rdp, + &rsp->expedited_workdone3, s)) + return NULL; + return rnp1; + } + + /* Invoked on each online non-idle CPU for expedited quiescent state. */ + static int synchronize_sched_expedited_cpu_stop(void *data) + { + struct rcu_data *rdp = data; + struct rcu_state *rsp = rdp->rsp; + + /* We are here: If we are last, do the wakeup. */ + rdp->exp_done = true; + if (atomic_dec_and_test(&rsp->expedited_need_qs)) + wake_up(&rsp->expedited_wq); return 0; }