From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Date: Tue, 4 Aug 2015 15:40:58 +0000 (-0700)
Subject: Merge branches 'fixes.2015.07.22a' and 'initexp.2015.08.04a' into HEAD
X-Git-Tag: v4.3-rc1~142^2~1^2~1
X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=8ff4fbfd69a6c7b9598f8c1f2df34f89bac02c1a;p=linux-2.6-block.git

Merge branches 'fixes.2015.07.22a' and 'initexp.2015.08.04a' into HEAD

fixes.2015.07.22a: Miscellaneous fixes.
initexp.2015.08.04a: Initialization and expedited updates.
	(Single branch due to conflicts.)
---

8ff4fbfd69a6c7b9598f8c1f2df34f89bac02c1a
diff --cc kernel/rcu/tree.c
index 0a73d26357a2,439112e9d1b3..9f75f25cc5d9
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@@ -3253,72 -3272,146 +3272,198 @@@ void cond_synchronize_rcu(unsigned lon
  }
  EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
  
 +/**
 + * get_state_synchronize_sched - Snapshot current RCU-sched state
 + *
 + * Returns a cookie that is used by a later call to cond_synchronize_sched()
 + * to determine whether or not a full grace period has elapsed in the
 + * meantime.
 + */
 +unsigned long get_state_synchronize_sched(void)
 +{
 +	/*
 +	 * Any prior manipulation of RCU-protected data must happen
 +	 * before the load from ->gpnum.
 +	 */
 +	smp_mb();  /* ^^^ */
 +
 +	/*
 +	 * Make sure this load happens before the purportedly
 +	 * time-consuming work between get_state_synchronize_sched()
 +	 * and cond_synchronize_sched().
 +	 */
 +	return smp_load_acquire(&rcu_sched_state.gpnum);
 +}
 +EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
 +
 +/**
 + * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
 + *
 + * @oldstate: return value from earlier call to get_state_synchronize_sched()
 + *
 + * If a full RCU-sched grace period has elapsed since the earlier call to
 + * get_state_synchronize_sched(), just return.  Otherwise, invoke
 + * synchronize_sched() to wait for a full grace period.
 + *
 + * Yes, this function does not take counter wrap into account.  But
 + * counter wrap is harmless.  If the counter wraps, we have waited for
 + * more than 2 billion grace periods (and way more on a 64-bit system!),
 + * so waiting for one additional grace period should be just fine.
 + */
 +void cond_synchronize_sched(unsigned long oldstate)
 +{
 +	unsigned long newstate;
 +
 +	/*
 +	 * Ensure that this load happens before any RCU-destructive
 +	 * actions the caller might carry out after we return.
 +	 */
 +	newstate = smp_load_acquire(&rcu_sched_state.completed);
 +	if (ULONG_CMP_GE(oldstate, newstate))
 +		synchronize_sched();
 +}
 +EXPORT_SYMBOL_GPL(cond_synchronize_sched);
 +
- static int synchronize_sched_expedited_cpu_stop(void *data)
+ /* Adjust sequence number for start of update-side operation. */
+ static void rcu_seq_start(unsigned long *sp)
+ {
+ 	WRITE_ONCE(*sp, *sp + 1);
+ 	smp_mb(); /* Ensure update-side operation after counter increment. */
+ 	WARN_ON_ONCE(!(*sp & 0x1));
+ }
+ 
+ /* Adjust sequence number for end of update-side operation. */
+ static void rcu_seq_end(unsigned long *sp)
+ {
+ 	smp_mb(); /* Ensure update-side operation before counter increment. */
+ 	WRITE_ONCE(*sp, *sp + 1);
+ 	WARN_ON_ONCE(*sp & 0x1);
+ }
+ 
+ /* Take a snapshot of the update side's sequence number. */
+ static unsigned long rcu_seq_snap(unsigned long *sp)
+ {
+ 	unsigned long s;
+ 
+ 	smp_mb(); /* Caller's modifications seen first by other CPUs. */
+ 	s = (READ_ONCE(*sp) + 3) & ~0x1;
+ 	smp_mb(); /* Above access must not bleed into critical section. */
+ 	return s;
+ }
+ 
+ /*
+  * Given a snapshot from rcu_seq_snap(), determine whether or not a
+  * full update-side operation has occurred.
+  */
+ static bool rcu_seq_done(unsigned long *sp, unsigned long s)
+ {
+ 	return ULONG_CMP_GE(READ_ONCE(*sp), s);
+ }
+ 
+ /* Wrapper functions for expedited grace periods.  */
+ static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
+ {
+ 	rcu_seq_start(&rsp->expedited_sequence);
+ }
+ static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
+ {
+ 	rcu_seq_end(&rsp->expedited_sequence);
+ 	smp_mb(); /* Ensure that consecutive grace periods serialize. */
+ }
+ static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
+ {
+ 	return rcu_seq_snap(&rsp->expedited_sequence);
+ }
+ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
+ {
+ 	return rcu_seq_done(&rsp->expedited_sequence, s);
+ }
+ 
+ /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
+ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
+ 			       struct rcu_data *rdp,
+ 			       atomic_long_t *stat, unsigned long s)
  {
+ 	if (rcu_exp_gp_seq_done(rsp, s)) {
+ 		if (rnp)
+ 			mutex_unlock(&rnp->exp_funnel_mutex);
+ 		else if (rdp)
+ 			mutex_unlock(&rdp->exp_funnel_mutex);
+ 		/* Ensure test happens before caller kfree(). */
+ 		smp_mb__before_atomic(); /* ^^^ */
+ 		atomic_long_inc(stat);
+ 		return true;
+ 	}
+ 	return false;
+ }
+ 
+ /*
+  * Funnel-lock acquisition for expedited grace periods.  Returns a
+  * pointer to the root rcu_node structure, or NULL if some other
+  * task did the expedited grace period for us.
+  */
+ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
+ {
+ 	struct rcu_data *rdp;
+ 	struct rcu_node *rnp0;
+ 	struct rcu_node *rnp1 = NULL;
+ 
  	/*
- 	 * There must be a full memory barrier on each affected CPU
- 	 * between the time that try_stop_cpus() is called and the
- 	 * time that it returns.
- 	 *
- 	 * In the current initial implementation of cpu_stop, the
- 	 * above condition is already met when the control reaches
- 	 * this point and the following smp_mb() is not strictly
- 	 * necessary.  Do smp_mb() anyway for documentation and
- 	 * robustness against future implementation changes.
+ 	 * First try directly acquiring the root lock in order to reduce
+ 	 * latency in the common case where expedited grace periods are
+ 	 * rare.  We check mutex_is_locked() to avoid pathological levels of
+ 	 * memory contention on ->exp_funnel_mutex in the heavy-load case.
  	 */
- 	smp_mb(); /* See above comment block. */
+ 	rnp0 = rcu_get_root(rsp);
+ 	if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
+ 		if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
+ 			if (sync_exp_work_done(rsp, rnp0, NULL,
+ 					       &rsp->expedited_workdone0, s))
+ 				return NULL;
+ 			return rnp0;
+ 		}
+ 	}
+ 
+ 	/*
+ 	 * Each pass through the following loop works its way
+ 	 * up the rcu_node tree, returning if others have done the
+ 	 * work or otherwise falls through holding the root rnp's
+ 	 * ->exp_funnel_mutex.  The mapping from CPU to rcu_node structure
+ 	 * can be inexact, as it is just promoting locality and is not
+ 	 * strictly needed for correctness.
+ 	 */
+ 	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
+ 	if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
+ 		return NULL;
+ 	mutex_lock(&rdp->exp_funnel_mutex);
+ 	rnp0 = rdp->mynode;
+ 	for (; rnp0 != NULL; rnp0 = rnp0->parent) {
+ 		if (sync_exp_work_done(rsp, rnp1, rdp,
+ 				       &rsp->expedited_workdone2, s))
+ 			return NULL;
+ 		mutex_lock(&rnp0->exp_funnel_mutex);
+ 		if (rnp1)
+ 			mutex_unlock(&rnp1->exp_funnel_mutex);
+ 		else
+ 			mutex_unlock(&rdp->exp_funnel_mutex);
+ 		rnp1 = rnp0;
+ 	}
+ 	if (sync_exp_work_done(rsp, rnp1, rdp,
+ 			       &rsp->expedited_workdone3, s))
+ 		return NULL;
+ 	return rnp1;
+ }
+ 
+ /* Invoked on each online non-idle CPU for expedited quiescent state. */
+ static int synchronize_sched_expedited_cpu_stop(void *data)
+ {
+ 	struct rcu_data *rdp = data;
+ 	struct rcu_state *rsp = rdp->rsp;
+ 
+ 	/* We are here: If we are last, do the wakeup. */
+ 	rdp->exp_done = true;
+ 	if (atomic_dec_and_test(&rsp->expedited_need_qs))
+ 		wake_up(&rsp->expedited_wq);
  	return 0;
  }