[linux-2.6-block.git] / kernel / rcu / tasks.h

/* SPDX-License-Identifier: GPL-2.0+ */
/*
 * Task-based RCU implementations.
 *
 * Copyright (C) 2020 Paul E. McKenney
 */


////////////////////////////////////////////////////////////////////////
//
// Generic data structures.

struct rcu_tasks;
typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp);

/**
 * Definition for a Tasks-RCU-like mechanism.
 * @cbs_head: Head of callback list.
 * @cbs_tail: Tail pointer for callback list.
 * @cbs_wq: Wait queue allowning new callback to get kthread's attention.
 * @cbs_lock: Lock protecting callback list.
 * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
 * @gp_func: This flavor's grace-period-wait function.
 * @call_func: This flavor's call_rcu()-equivalent function.
 */
struct rcu_tasks {
	struct rcu_head *cbs_head;
	struct rcu_head **cbs_tail;
	struct wait_queue_head cbs_wq;
	raw_spinlock_t cbs_lock;
	struct task_struct *kthread_ptr;
	rcu_tasks_gp_func_t gp_func;
	call_rcu_func_t call_func;
};

#define DEFINE_RCU_TASKS(name, gp, call)				\
static struct rcu_tasks name =						\
{									\
	.cbs_tail = &name.cbs_head,					\
	.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(name.cbs_wq),		\
	.cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(name.cbs_lock),		\
	.gp_func = gp,							\
	.call_func = call,						\
}

/* Track exiting tasks in order to allow them to be waited for. */
DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);

/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
module_param(rcu_task_stall_timeout, int, 0644);

////////////////////////////////////////////////////////////////////////
//
// Generic code.

// Enqueue a callback for the specified flavor of Tasks RCU.
static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
				   struct rcu_tasks *rtp)
{
	unsigned long flags;
	bool needwake;

	rhp->next = NULL;
	rhp->func = func;
	raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
	needwake = !rtp->cbs_head;
	WRITE_ONCE(*rtp->cbs_tail, rhp);
	rtp->cbs_tail = &rhp->next;
	raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
	/* We can't create the thread unless interrupts are enabled. */
	if (needwake && READ_ONCE(rtp->kthread_ptr))
		wake_up(&rtp->cbs_wq);
}

// Wait for a grace period for the specified flavor of Tasks RCU.
static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
{
	/* Complain if the scheduler has not started.  */
	RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
			 "synchronize_rcu_tasks called too soon");

	/* Wait for the grace period. */
	wait_rcu_gp(rtp->call_func);
}

/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
static int __noreturn rcu_tasks_kthread(void *arg)
{
	unsigned long flags;
	struct rcu_head *list;
	struct rcu_head *next;
	struct rcu_tasks *rtp = arg;

	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
	housekeeping_affine(current, HK_FLAG_RCU);
	WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!

	/*
	 * Each pass through the following loop makes one check for
	 * newly arrived callbacks, and, if there are some, waits for
	 * one RCU-tasks grace period and then invokes the callbacks.
	 * This loop is terminated by the system going down.  ;-)
	 */
	for (;;) {

		/* Pick up any new callbacks. */
		raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
		list = rtp->cbs_head;
		rtp->cbs_head = NULL;
		rtp->cbs_tail = &rtp->cbs_head;
		raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);

		/* If there were none, wait a bit and start over. */
		if (!list) {
			wait_event_interruptible(rtp->cbs_wq,
						 READ_ONCE(rtp->cbs_head));
			if (!rtp->cbs_head) {
				WARN_ON(signal_pending(current));
				schedule_timeout_interruptible(HZ/10);
			}
			continue;
		}

		// Wait for one grace period.
		rtp->gp_func(rtp);

		/* Invoke the callbacks. */
		while (list) {
			next = list->next;
			local_bh_disable();
			list->func(list);
			local_bh_enable();
			list = next;
			cond_resched();
		}
		/* Paranoid sleep to keep this from entering a tight loop */
		schedule_timeout_uninterruptible(HZ/10);
	}
}

/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
{
	struct task_struct *t;

	t = kthread_run(rcu_tasks_kthread, rtp, "rcu_tasks_kthread");
	if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__))
		return;
	smp_mb(); /* Ensure others see full kthread. */
}

/* Do the srcu_read_lock() for the above synchronize_srcu().  */
void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
{
	preempt_disable();
	current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
	preempt_enable();
}

/* Do the srcu_read_unlock() for the above synchronize_srcu().  */
void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
{
	preempt_disable();
	__srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
	preempt_enable();
}

#ifndef CONFIG_TINY_RCU

/*
 * Print any non-default Tasks RCU settings.
 */
static void __init rcu_tasks_bootup_oddness(void)
{
#ifdef CONFIG_TASKS_RCU
	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
		pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
	else
		pr_info("\tTasks RCU enabled.\n");
#endif /* #ifdef CONFIG_TASKS_RCU */
#ifdef CONFIG_TASKS_RUDE_RCU
	pr_info("\tRude variant of Tasks RCU enabled.\n");
#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
}

#endif /* #ifndef CONFIG_TINY_RCU */

#ifdef CONFIG_TASKS_RCU

////////////////////////////////////////////////////////////////////////
//
// Simple variant of RCU whose quiescent states are voluntary context
// switch, cond_resched_rcu_qs(), user-space execution, and idle.
// As such, grace periods can take one good long time.  There are no
// read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
// because this implementation is intended to get the system into a safe
// state for some of the manipulations involved in tracing and the like.
// Finally, this implementation does not support high call_rcu_tasks()
// rates from multiple CPUs.  If this is required, per-CPU callback lists
// will be needed.

/* See if tasks are still holding out, complain if so. */
static void check_holdout_task(struct task_struct *t,
			       bool needreport, bool *firstreport)
{
	int cpu;

	if (!READ_ONCE(t->rcu_tasks_holdout) ||
	    t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
	    !READ_ONCE(t->on_rq) ||
	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
	     !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
		WRITE_ONCE(t->rcu_tasks_holdout, false);
		list_del_init(&t->rcu_tasks_holdout_list);
		put_task_struct(t);
		return;
	}
	rcu_request_urgent_qs_task(t);
	if (!needreport)
		return;
	if (*firstreport) {
		pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
		*firstreport = false;
	}
	cpu = task_cpu(t);
	pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
		 t, ".I"[is_idle_task(t)],
		 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
		 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
		 t->rcu_tasks_idle_cpu, cpu);
	sched_show_task(t);
}

/* Wait for one RCU-tasks grace period. */
static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
{
	struct task_struct *g, *t;
	unsigned long lastreport;
	LIST_HEAD(rcu_tasks_holdouts);
	int fract;

	/*
	 * Wait for all pre-existing t->on_rq and t->nvcsw transitions
	 * to complete.  Invoking synchronize_rcu() suffices because all
	 * these transitions occur with interrupts disabled.  Without this
	 * synchronize_rcu(), a read-side critical section that started
	 * before the grace period might be incorrectly seen as having
	 * started after the grace period.
	 *
	 * This synchronize_rcu() also dispenses with the need for a
	 * memory barrier on the first store to t->rcu_tasks_holdout,
	 * as it forces the store to happen after the beginning of the
	 * grace period.
	 */
	synchronize_rcu();

	/*
	 * There were callbacks, so we need to wait for an RCU-tasks
	 * grace period.  Start off by scanning the task list for tasks
	 * that are not already voluntarily blocked.  Mark these tasks
	 * and make a list of them in rcu_tasks_holdouts.
	 */
	rcu_read_lock();
	for_each_process_thread(g, t) {
		if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
			get_task_struct(t);
			t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
			WRITE_ONCE(t->rcu_tasks_holdout, true);
			list_add(&t->rcu_tasks_holdout_list,
				 &rcu_tasks_holdouts);
		}
	}
	rcu_read_unlock();

	/*
	 * Wait for tasks that are in the process of exiting.  This
	 * does only part of the job, ensuring that all tasks that were
	 * previously exiting reach the point where they have disabled
	 * preemption, allowing the later synchronize_rcu() to finish
	 * the job.
	 */
	synchronize_srcu(&tasks_rcu_exit_srcu);

	/*
	 * Each pass through the following loop scans the list of holdout
	 * tasks, removing any that are no longer holdouts.  When the list
	 * is empty, we are done.
	 */
	lastreport = jiffies;

	/* Start off with HZ/10 wait and slowly back off to 1 HZ wait. */
	fract = 10;

	for (;;) {
		bool firstreport;
		bool needreport;
		int rtst;
		struct task_struct *t1;

		if (list_empty(&rcu_tasks_holdouts))
			break;

		/* Slowly back off waiting for holdouts */
		schedule_timeout_interruptible(HZ/fract);

		if (fract > 1)
			fract--;

		rtst = READ_ONCE(rcu_task_stall_timeout);
		needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
		if (needreport)
			lastreport = jiffies;
		firstreport = true;
		WARN_ON(signal_pending(current));
		list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
					 rcu_tasks_holdout_list) {
			check_holdout_task(t, needreport, &firstreport);
			cond_resched();
		}
	}

	/*
	 * Because ->on_rq and ->nvcsw are not guaranteed to have a full
	 * memory barriers prior to them in the schedule() path, memory
	 * reordering on other CPUs could cause their RCU-tasks read-side
	 * critical sections to extend past the end of the grace period.
	 * However, because these ->nvcsw updates are carried out with
	 * interrupts disabled, we can use synchronize_rcu() to force the
	 * needed ordering on all such CPUs.
	 *
	 * This synchronize_rcu() also confines all ->rcu_tasks_holdout
	 * accesses to be within the grace period, avoiding the need for
	 * memory barriers for ->rcu_tasks_holdout accesses.
	 *
	 * In addition, this synchronize_rcu() waits for exiting tasks
	 * to complete their final preempt_disable() region of execution,
	 * cleaning up after the synchronize_srcu() above.
	 */
	synchronize_rcu();
}

void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks);

/**
 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
 * @rhp: structure to be used for queueing the RCU updates.
 * @func: actual callback function to be invoked after the grace period
 *
 * The callback function will be invoked some time after a full grace
 * period elapses, in other words after all currently executing RCU
 * read-side critical sections have completed. call_rcu_tasks() assumes
 * that the read-side critical sections end at a voluntary context
 * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
 * or transition to usermode execution.  As such, there are no read-side
 * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
 * this primitive is intended to determine that all tasks have passed
 * through a safe state, not so much for data-strcuture synchronization.
 *
 * See the description of call_rcu() for more detailed information on
 * memory ordering guarantees.
 */
void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
{
	call_rcu_tasks_generic(rhp, func, &rcu_tasks);
}
EXPORT_SYMBOL_GPL(call_rcu_tasks);

/**
 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
 *
 * Control will return to the caller some time after a full rcu-tasks
 * grace period has elapsed, in other words after all currently
 * executing rcu-tasks read-side critical sections have elapsed.  These
 * read-side critical sections are delimited by calls to schedule(),
 * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
 *
 * This is a very specialized primitive, intended only for a few uses in
 * tracing and other situations requiring manipulation of function
 * preambles and profiling hooks.  The synchronize_rcu_tasks() function
 * is not (yet) intended for heavy use from multiple CPUs.
 *
 * See the description of synchronize_rcu() for more detailed information
 * on memory ordering guarantees.
 */
void synchronize_rcu_tasks(void)
{
	synchronize_rcu_tasks_generic(&rcu_tasks);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);

/**
 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
 *
 * Although the current implementation is guaranteed to wait, it is not
 * obligated to, for example, if there are no pending callbacks.
 */
void rcu_barrier_tasks(void)
{
	/* There is only one callback queue, so this is easy.  ;-) */
	synchronize_rcu_tasks();
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks);

static int __init rcu_spawn_tasks_kthread(void)
{
	rcu_spawn_tasks_kthread_generic(&rcu_tasks);
	return 0;
}
core_initcall(rcu_spawn_tasks_kthread);

#endif /* #ifdef CONFIG_TASKS_RCU */

#ifdef CONFIG_TASKS_RUDE_RCU

////////////////////////////////////////////////////////////////////////
//
// "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
// passing an empty function to schedule_on_each_cpu().  This approach
// provides an asynchronous call_rcu_tasks_rude() API and batching
// of concurrent calls to the synchronous synchronize_rcu_rude() API.
// This sends IPIs far and wide and induces otherwise unnecessary context
// switches on all online CPUs, whether idle or not.

// Empty function to allow workqueues to force a context switch.
static void rcu_tasks_be_rude(struct work_struct *work)
{
}

// Wait for one rude RCU-tasks grace period.
static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
{
	schedule_on_each_cpu(rcu_tasks_be_rude);
}

void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func);
DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude);

/**
 * call_rcu_tasks_rude() - Queue a callback rude task-based grace period
 * @rhp: structure to be used for queueing the RCU updates.
 * @func: actual callback function to be invoked after the grace period
 *
 * The callback function will be invoked some time after a full grace
 * period elapses, in other words after all currently executing RCU
 * read-side critical sections have completed. call_rcu_tasks_rude()
 * assumes that the read-side critical sections end at context switch,
 * cond_resched_rcu_qs(), or transition to usermode execution.  As such,
 * there are no read-side primitives analogous to rcu_read_lock() and
 * rcu_read_unlock() because this primitive is intended to determine
 * that all tasks have passed through a safe state, not so much for
 * data-strcuture synchronization.
 *
 * See the description of call_rcu() for more detailed information on
 * memory ordering guarantees.
 */
void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
{
	call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude);
}
EXPORT_SYMBOL_GPL(call_rcu_tasks_rude);

/**
 * synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period
 *
 * Control will return to the caller some time after a rude rcu-tasks
 * grace period has elapsed, in other words after all currently
 * executing rcu-tasks read-side critical sections have elapsed.  These
 * read-side critical sections are delimited by calls to schedule(),
 * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
 * anyway) cond_resched().
 *
 * This is a very specialized primitive, intended only for a few uses in
 * tracing and other situations requiring manipulation of function preambles
 * and profiling hooks.  The synchronize_rcu_tasks_rude() function is not
 * (yet) intended for heavy use from multiple CPUs.
 *
 * See the description of synchronize_rcu() for more detailed information
 * on memory ordering guarantees.
 */
void synchronize_rcu_tasks_rude(void)
{
	synchronize_rcu_tasks_generic(&rcu_tasks_rude);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);

/**
 * rcu_barrier_tasks_rude - Wait for in-flight call_rcu_tasks_rude() callbacks.
 *
 * Although the current implementation is guaranteed to wait, it is not
 * obligated to, for example, if there are no pending callbacks.
 */
void rcu_barrier_tasks_rude(void)
{
	/* There is only one callback queue, so this is easy.  ;-) */
	synchronize_rcu_tasks_rude();
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);

static int __init rcu_spawn_tasks_rude_kthread(void)
{
	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
	return 0;
}
core_initcall(rcu_spawn_tasks_rude_kthread);

#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
Commit	Line	Data
eacd6f04 PM	1	/* SPDX-License-Identifier: GPL-2.0+ */
	2	/*
	3	* Task-based RCU implementations.
	4	*
	5	* Copyright (C) 2020 Paul E. McKenney
	6	*/
	7
5873b8a9 PM	8
	9	////////////////////////////////////////////////////////////////////////
	10	//
	11	// Generic data structures.
	12
	13	struct rcu_tasks;
	14	typedef void (rcu_tasks_gp_func_t)(struct rcu_tasks rtp);
eacd6f04	15
07e10515 PM	16	/**
	17	* Definition for a Tasks-RCU-like mechanism.
	18	* @cbs_head: Head of callback list.
	19	* @cbs_tail: Tail pointer for callback list.
	20	* @cbs_wq: Wait queue allowning new callback to get kthread's attention.
	21	* @cbs_lock: Lock protecting callback list.
	22	* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
5873b8a9 PM	23	* @gp_func: This flavor's grace-period-wait function.
5873b8a9 PM	24	* @call_func: This flavor's call_rcu()-equivalent function.
07e10515 PM	25	*/
	26	struct rcu_tasks {
	27	struct rcu_head *cbs_head;
	28	struct rcu_head **cbs_tail;
	29	struct wait_queue_head cbs_wq;
	30	raw_spinlock_t cbs_lock;
	31	struct task_struct *kthread_ptr;
5873b8a9 PM	32	rcu_tasks_gp_func_t gp_func;
5873b8a9 PM	33	call_rcu_func_t call_func;
07e10515 PM	34	};
07e10515 PM	35
5873b8a9	36	#define DEFINE_RCU_TASKS(name, gp, call) \
07e10515 PM	37	static struct rcu_tasks name = \
	38	{ \
	39	.cbs_tail = &name.cbs_head, \
	40	.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(name.cbs_wq), \
	41	.cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(name.cbs_lock), \
5873b8a9 PM	42	.gp_func = gp, \
5873b8a9 PM	43	.call_func = call, \
07e10515 PM	44	}
07e10515 PM	45
eacd6f04 PM	46	/* Track exiting tasks in order to allow them to be waited for. */
	47	DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
	48
	49	/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
	50	#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
	51	static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
	52	module_param(rcu_task_stall_timeout, int, 0644);
	53
5873b8a9 PM	54	////////////////////////////////////////////////////////////////////////
	55	//
	56	// Generic code.
	57
	58	// Enqueue a callback for the specified flavor of Tasks RCU.
	59	static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
	60	struct rcu_tasks *rtp)
eacd6f04 PM	61	{
	62	unsigned long flags;
	63	bool needwake;
	64
	65	rhp->next = NULL;
	66	rhp->func = func;
07e10515 PM	67	raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
	68	needwake = !rtp->cbs_head;
	69	WRITE_ONCE(*rtp->cbs_tail, rhp);
	70	rtp->cbs_tail = &rhp->next;
	71	raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
eacd6f04	72	/* We can't create the thread unless interrupts are enabled. */
07e10515 PM	73	if (needwake && READ_ONCE(rtp->kthread_ptr))
07e10515 PM	74	wake_up(&rtp->cbs_wq);
eacd6f04	75	}
eacd6f04	76
5873b8a9 PM	77	// Wait for a grace period for the specified flavor of Tasks RCU.
5873b8a9 PM	78	static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
eacd6f04 PM	79	{
	80	/* Complain if the scheduler has not started. */
	81	RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
	82	"synchronize_rcu_tasks called too soon");
	83
	84	/* Wait for the grace period. */
5873b8a9	85	wait_rcu_gp(rtp->call_func);
eacd6f04 PM	86	}
	87
	88	/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
	89	static int __noreturn rcu_tasks_kthread(void *arg)
	90	{
	91	unsigned long flags;
eacd6f04 PM	92	struct rcu_head *list;
eacd6f04 PM	93	struct rcu_head *next;
07e10515	94	struct rcu_tasks *rtp = arg;
eacd6f04 PM	95
	96	/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
	97	housekeeping_affine(current, HK_FLAG_RCU);
07e10515	98	WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
eacd6f04 PM	99
	100	/*
	101	* Each pass through the following loop makes one check for
	102	* newly arrived callbacks, and, if there are some, waits for
	103	* one RCU-tasks grace period and then invokes the callbacks.
	104	* This loop is terminated by the system going down. ;-)
	105	*/
	106	for (;;) {
	107
	108	/* Pick up any new callbacks. */
07e10515 PM	109	raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
	110	list = rtp->cbs_head;
	111	rtp->cbs_head = NULL;
	112	rtp->cbs_tail = &rtp->cbs_head;
	113	raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
eacd6f04 PM	114
	115	/* If there were none, wait a bit and start over. */
	116	if (!list) {
07e10515 PM	117	wait_event_interruptible(rtp->cbs_wq,
	118	READ_ONCE(rtp->cbs_head));
	119	if (!rtp->cbs_head) {
eacd6f04 PM	120	WARN_ON(signal_pending(current));
	121	schedule_timeout_interruptible(HZ/10);
	122	}
	123	continue;
	124	}
	125
5873b8a9 PM	126	// Wait for one grace period.
5873b8a9 PM	127	rtp->gp_func(rtp);
eacd6f04 PM	128
	129	/* Invoke the callbacks. */
	130	while (list) {
	131	next = list->next;
	132	local_bh_disable();
	133	list->func(list);
	134	local_bh_enable();
	135	list = next;
	136	cond_resched();
	137	}
	138	/* Paranoid sleep to keep this from entering a tight loop */
	139	schedule_timeout_uninterruptible(HZ/10);
	140	}
	141	}
	142
5873b8a9 PM	143	/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
5873b8a9 PM	144	static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
eacd6f04 PM	145	{
	146	struct task_struct *t;
	147
5873b8a9	148	t = kthread_run(rcu_tasks_kthread, rtp, "rcu_tasks_kthread");
eacd6f04	149	if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__))
5873b8a9	150	return;
eacd6f04	151	smp_mb(); /* Ensure others see full kthread. */
eacd6f04	152	}
eacd6f04 PM	153
	154	/* Do the srcu_read_lock() for the above synchronize_srcu(). */
	155	void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
	156	{
	157	preempt_disable();
	158	current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
	159	preempt_enable();
	160	}
	161
	162	/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
	163	void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
	164	{
	165	preempt_disable();
	166	__srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
	167	preempt_enable();
	168	}
	169
eacd6f04 PM	170	#ifndef CONFIG_TINY_RCU
	171
	172	/*
	173	* Print any non-default Tasks RCU settings.
	174	*/
	175	static void __init rcu_tasks_bootup_oddness(void)
	176	{
	177	#ifdef CONFIG_TASKS_RCU
	178	if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
	179	pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
	180	else
	181	pr_info("\tTasks RCU enabled.\n");
	182	#endif /* #ifdef CONFIG_TASKS_RCU */
c84aad76 PM	183	#ifdef CONFIG_TASKS_RUDE_RCU
	184	pr_info("\tRude variant of Tasks RCU enabled.\n");
	185	#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
eacd6f04 PM	186	}
	187
	188	#endif /* #ifndef CONFIG_TINY_RCU */
5873b8a9 PM	189
	190	#ifdef CONFIG_TASKS_RCU
	191
	192	////////////////////////////////////////////////////////////////////////
	193	//
	194	// Simple variant of RCU whose quiescent states are voluntary context
	195	// switch, cond_resched_rcu_qs(), user-space execution, and idle.
	196	// As such, grace periods can take one good long time. There are no
	197	// read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
	198	// because this implementation is intended to get the system into a safe
	199	// state for some of the manipulations involved in tracing and the like.
	200	// Finally, this implementation does not support high call_rcu_tasks()
	201	// rates from multiple CPUs. If this is required, per-CPU callback lists
	202	// will be needed.
	203
	204	/* See if tasks are still holding out, complain if so. */
	205	static void check_holdout_task(struct task_struct *t,
	206	bool needreport, bool *firstreport)
	207	{
	208	int cpu;
	209
	210	if (!READ_ONCE(t->rcu_tasks_holdout) \|\|
	211	t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) \|\|
	212	!READ_ONCE(t->on_rq) \|\|
	213	(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
	214	!is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
	215	WRITE_ONCE(t->rcu_tasks_holdout, false);
	216	list_del_init(&t->rcu_tasks_holdout_list);
	217	put_task_struct(t);
	218	return;
	219	}
	220	rcu_request_urgent_qs_task(t);
	221	if (!needreport)
	222	return;
	223	if (*firstreport) {
	224	pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
	225	*firstreport = false;
	226	}
	227	cpu = task_cpu(t);
	228	pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
	229	t, ".I"[is_idle_task(t)],
	230	"N."[cpu < 0 \|\| !tick_nohz_full_cpu(cpu)],
	231	t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
	232	t->rcu_tasks_idle_cpu, cpu);
	233	sched_show_task(t);
	234	}
	235
	236	/* Wait for one RCU-tasks grace period. */
	237	static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
	238	{
	239	struct task_struct g, t;
	240	unsigned long lastreport;
	241	LIST_HEAD(rcu_tasks_holdouts);
	242	int fract;
	243
	244	/*
	245	* Wait for all pre-existing t->on_rq and t->nvcsw transitions
	246	* to complete. Invoking synchronize_rcu() suffices because all
	247	* these transitions occur with interrupts disabled. Without this
	248	* synchronize_rcu(), a read-side critical section that started
	249	* before the grace period might be incorrectly seen as having
	250	* started after the grace period.
	251	*
	252	* This synchronize_rcu() also dispenses with the need for a
253	* memory barrier on the first store to t->rcu_tasks_holdout,
254	* as it forces the store to happen after the beginning of the
255	* grace period.
256	*/
257	synchronize_rcu();
258
259	/*
260	* There were callbacks, so we need to wait for an RCU-tasks
261	* grace period. Start off by scanning the task list for tasks
262	* that are not already voluntarily blocked. Mark these tasks
263	* and make a list of them in rcu_tasks_holdouts.
264	*/
265	rcu_read_lock();
266	for_each_process_thread(g, t) {
267	if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
268	get_task_struct(t);
269	t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
270	WRITE_ONCE(t->rcu_tasks_holdout, true);
271	list_add(&t->rcu_tasks_holdout_list,
272	&rcu_tasks_holdouts);
273	}
274	}
275	rcu_read_unlock();
276
277	/*
278	* Wait for tasks that are in the process of exiting. This
279	* does only part of the job, ensuring that all tasks that were
280	* previously exiting reach the point where they have disabled
281	* preemption, allowing the later synchronize_rcu() to finish
282	* the job.
283	*/
284	synchronize_srcu(&tasks_rcu_exit_srcu);
285
286	/*
287	* Each pass through the following loop scans the list of holdout
288	* tasks, removing any that are no longer holdouts. When the list
289	* is empty, we are done.
290	*/
291	lastreport = jiffies;
292
293	/* Start off with HZ/10 wait and slowly back off to 1 HZ wait. */
294	fract = 10;
295
296	for (;;) {
297	bool firstreport;
298	bool needreport;
299	int rtst;
300	struct task_struct *t1;
301
302	if (list_empty(&rcu_tasks_holdouts))
303	break;
304
305	/* Slowly back off waiting for holdouts */
306	schedule_timeout_interruptible(HZ/fract);
307
308	if (fract > 1)
309	fract--;
310
311	rtst = READ_ONCE(rcu_task_stall_timeout);
312	needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
313	if (needreport)
314	lastreport = jiffies;
315	firstreport = true;
316	WARN_ON(signal_pending(current));
317	list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
318	rcu_tasks_holdout_list) {
319	check_holdout_task(t, needreport, &firstreport);
320	cond_resched();
321	}
322	}
323
324	/*
325	* Because ->on_rq and ->nvcsw are not guaranteed to have a full
326	* memory barriers prior to them in the schedule() path, memory
327	* reordering on other CPUs could cause their RCU-tasks read-side
328	* critical sections to extend past the end of the grace period.
329	* However, because these ->nvcsw updates are carried out with
330	* interrupts disabled, we can use synchronize_rcu() to force the
331	* needed ordering on all such CPUs.
332	*
333	* This synchronize_rcu() also confines all ->rcu_tasks_holdout
334	* accesses to be within the grace period, avoiding the need for
335	* memory barriers for ->rcu_tasks_holdout accesses.
336	*
337	* In addition, this synchronize_rcu() waits for exiting tasks
338	* to complete their final preempt_disable() region of execution,
339	* cleaning up after the synchronize_srcu() above.
340	*/
341	synchronize_rcu();
342	}
343
344	void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
345	DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks);
346
347	/**
348	* call_rcu_tasks() - Queue an RCU for invocation task-based grace period
349	* @rhp: structure to be used for queueing the RCU updates.
350	* @func: actual callback function to be invoked after the grace period
351	*
352	* The callback function will be invoked some time after a full grace
353	* period elapses, in other words after all currently executing RCU
354	* read-side critical sections have completed. call_rcu_tasks() assumes
355	* that the read-side critical sections end at a voluntary context
356	* switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
357	* or transition to usermode execution. As such, there are no read-side
358	* primitives analogous to rcu_read_lock() and rcu_read_unlock() because
359	* this primitive is intended to determine that all tasks have passed
360	* through a safe state, not so much for data-strcuture synchronization.
361	*
362	* See the description of call_rcu() for more detailed information on
363	* memory ordering guarantees.
364	*/
365	void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
366	{
367	call_rcu_tasks_generic(rhp, func, &rcu_tasks);
368	}
369	EXPORT_SYMBOL_GPL(call_rcu_tasks);
370
371	/**
372	* synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
373	*
374	* Control will return to the caller some time after a full rcu-tasks
375	* grace period has elapsed, in other words after all currently
376	* executing rcu-tasks read-side critical sections have elapsed. These
377	* read-side critical sections are delimited by calls to schedule(),
378	* cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
379	* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
380	*
381	* This is a very specialized primitive, intended only for a few uses in
382	* tracing and other situations requiring manipulation of function
383	* preambles and profiling hooks. The synchronize_rcu_tasks() function
384	* is not (yet) intended for heavy use from multiple CPUs.
385	*
386	* See the description of synchronize_rcu() for more detailed information
387	* on memory ordering guarantees.
388	*/
389	void synchronize_rcu_tasks(void)
390	{
391	synchronize_rcu_tasks_generic(&rcu_tasks);
392	}
393	EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
394
395	/**
396	* rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
397	*
398	* Although the current implementation is guaranteed to wait, it is not
399	* obligated to, for example, if there are no pending callbacks.
400	*/
401	void rcu_barrier_tasks(void)
402	{
403	/* There is only one callback queue, so this is easy. ;-) */
404	synchronize_rcu_tasks();
405	}
406	EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
407
408	static int __init rcu_spawn_tasks_kthread(void)
409	{
410	rcu_spawn_tasks_kthread_generic(&rcu_tasks);
411	return 0;
412	}
413	core_initcall(rcu_spawn_tasks_kthread);
414
415	#endif /* #ifdef CONFIG_TASKS_RCU */
c84aad76 PM	416
	417	#ifdef CONFIG_TASKS_RUDE_RCU
	418
	419	////////////////////////////////////////////////////////////////////////
	420	//
	421	// "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
	422	// passing an empty function to schedule_on_each_cpu(). This approach
	423	// provides an asynchronous call_rcu_tasks_rude() API and batching
	424	// of concurrent calls to the synchronous synchronize_rcu_rude() API.
	425	// This sends IPIs far and wide and induces otherwise unnecessary context
	426	// switches on all online CPUs, whether idle or not.
	427
	428	// Empty function to allow workqueues to force a context switch.
	429	static void rcu_tasks_be_rude(struct work_struct *work)
	430	{
	431	}
	432
	433	// Wait for one rude RCU-tasks grace period.
	434	static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
	435	{
	436	schedule_on_each_cpu(rcu_tasks_be_rude);
	437	}
	438
	439	void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func);
	440	DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude);
	441
	442	/**
	443	* call_rcu_tasks_rude() - Queue a callback rude task-based grace period
	444	* @rhp: structure to be used for queueing the RCU updates.
	445	* @func: actual callback function to be invoked after the grace period
	446	*
	447	* The callback function will be invoked some time after a full grace
	448	* period elapses, in other words after all currently executing RCU
	449	* read-side critical sections have completed. call_rcu_tasks_rude()
	450	* assumes that the read-side critical sections end at context switch,
	451	* cond_resched_rcu_qs(), or transition to usermode execution. As such,
	452	* there are no read-side primitives analogous to rcu_read_lock() and
	453	* rcu_read_unlock() because this primitive is intended to determine
	454	* that all tasks have passed through a safe state, not so much for
	455	* data-strcuture synchronization.
	456	*
	457	* See the description of call_rcu() for more detailed information on
	458	* memory ordering guarantees.
	459	*/
	460	void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
	461	{
	462	call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude);
	463	}
	464	EXPORT_SYMBOL_GPL(call_rcu_tasks_rude);
	465
	466	/**
	467	* synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period
	468	*
	469	* Control will return to the caller some time after a rude rcu-tasks
	470	* grace period has elapsed, in other words after all currently
	471	* executing rcu-tasks read-side critical sections have elapsed. These
	472	* read-side critical sections are delimited by calls to schedule(),
	473	* cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
	474	* anyway) cond_resched().
	475	*
	476	* This is a very specialized primitive, intended only for a few uses in
	477	* tracing and other situations requiring manipulation of function preambles
	478	* and profiling hooks. The synchronize_rcu_tasks_rude() function is not
	479	* (yet) intended for heavy use from multiple CPUs.
480	*
481	* See the description of synchronize_rcu() for more detailed information
482	* on memory ordering guarantees.
483	*/
484	void synchronize_rcu_tasks_rude(void)
485	{
486	synchronize_rcu_tasks_generic(&rcu_tasks_rude);
487	}
488	EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
489
490	/**
491	* rcu_barrier_tasks_rude - Wait for in-flight call_rcu_tasks_rude() callbacks.
492	*
493	* Although the current implementation is guaranteed to wait, it is not
494	* obligated to, for example, if there are no pending callbacks.
495	*/
496	void rcu_barrier_tasks_rude(void)
497	{
498	/* There is only one callback queue, so this is easy. ;-) */
499	synchronize_rcu_tasks_rude();
500	}
501	EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
502
503	static int __init rcu_spawn_tasks_rude_kthread(void)
504	{
505	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
506	return 0;
507	}
508	core_initcall(rcu_spawn_tasks_rude_kthread);
509
510	#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */