rcutorture: Add torture tests for RCU Tasks Rude
[linux-2.6-block.git] / kernel / rcu / tasks.h
CommitLineData
eacd6f04
PM
1/* SPDX-License-Identifier: GPL-2.0+ */
2/*
3 * Task-based RCU implementations.
4 *
5 * Copyright (C) 2020 Paul E. McKenney
6 */
7
5873b8a9
PM
8
9////////////////////////////////////////////////////////////////////////
10//
11// Generic data structures.
12
13struct rcu_tasks;
14typedef void (*rcu_tasks_gp_func_t)(struct rcu_tasks *rtp);
eacd6f04 15
07e10515
PM
16/**
17 * Definition for a Tasks-RCU-like mechanism.
18 * @cbs_head: Head of callback list.
19 * @cbs_tail: Tail pointer for callback list.
20 * @cbs_wq: Wait queue allowning new callback to get kthread's attention.
21 * @cbs_lock: Lock protecting callback list.
22 * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
5873b8a9
PM
23 * @gp_func: This flavor's grace-period-wait function.
24 * @call_func: This flavor's call_rcu()-equivalent function.
07e10515
PM
25 */
26struct rcu_tasks {
27 struct rcu_head *cbs_head;
28 struct rcu_head **cbs_tail;
29 struct wait_queue_head cbs_wq;
30 raw_spinlock_t cbs_lock;
31 struct task_struct *kthread_ptr;
5873b8a9
PM
32 rcu_tasks_gp_func_t gp_func;
33 call_rcu_func_t call_func;
07e10515
PM
34};
35
5873b8a9 36#define DEFINE_RCU_TASKS(name, gp, call) \
07e10515
PM
37static struct rcu_tasks name = \
38{ \
39 .cbs_tail = &name.cbs_head, \
40 .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(name.cbs_wq), \
41 .cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(name.cbs_lock), \
5873b8a9
PM
42 .gp_func = gp, \
43 .call_func = call, \
07e10515
PM
44}
45
eacd6f04
PM
46/* Track exiting tasks in order to allow them to be waited for. */
47DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
48
49/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
50#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
51static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
52module_param(rcu_task_stall_timeout, int, 0644);
53
5873b8a9
PM
54////////////////////////////////////////////////////////////////////////
55//
56// Generic code.
57
58// Enqueue a callback for the specified flavor of Tasks RCU.
59static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
60 struct rcu_tasks *rtp)
eacd6f04
PM
61{
62 unsigned long flags;
63 bool needwake;
64
65 rhp->next = NULL;
66 rhp->func = func;
07e10515
PM
67 raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
68 needwake = !rtp->cbs_head;
69 WRITE_ONCE(*rtp->cbs_tail, rhp);
70 rtp->cbs_tail = &rhp->next;
71 raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
eacd6f04 72 /* We can't create the thread unless interrupts are enabled. */
07e10515
PM
73 if (needwake && READ_ONCE(rtp->kthread_ptr))
74 wake_up(&rtp->cbs_wq);
eacd6f04 75}
eacd6f04 76
5873b8a9
PM
77// Wait for a grace period for the specified flavor of Tasks RCU.
78static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
eacd6f04
PM
79{
80 /* Complain if the scheduler has not started. */
81 RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
82 "synchronize_rcu_tasks called too soon");
83
84 /* Wait for the grace period. */
5873b8a9 85 wait_rcu_gp(rtp->call_func);
eacd6f04
PM
86}
87
88/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
89static int __noreturn rcu_tasks_kthread(void *arg)
90{
91 unsigned long flags;
eacd6f04
PM
92 struct rcu_head *list;
93 struct rcu_head *next;
07e10515 94 struct rcu_tasks *rtp = arg;
eacd6f04
PM
95
96 /* Run on housekeeping CPUs by default. Sysadm can move if desired. */
97 housekeeping_affine(current, HK_FLAG_RCU);
07e10515 98 WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
eacd6f04
PM
99
100 /*
101 * Each pass through the following loop makes one check for
102 * newly arrived callbacks, and, if there are some, waits for
103 * one RCU-tasks grace period and then invokes the callbacks.
104 * This loop is terminated by the system going down. ;-)
105 */
106 for (;;) {
107
108 /* Pick up any new callbacks. */
07e10515
PM
109 raw_spin_lock_irqsave(&rtp->cbs_lock, flags);
110 list = rtp->cbs_head;
111 rtp->cbs_head = NULL;
112 rtp->cbs_tail = &rtp->cbs_head;
113 raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags);
eacd6f04
PM
114
115 /* If there were none, wait a bit and start over. */
116 if (!list) {
07e10515
PM
117 wait_event_interruptible(rtp->cbs_wq,
118 READ_ONCE(rtp->cbs_head));
119 if (!rtp->cbs_head) {
eacd6f04
PM
120 WARN_ON(signal_pending(current));
121 schedule_timeout_interruptible(HZ/10);
122 }
123 continue;
124 }
125
5873b8a9
PM
126 // Wait for one grace period.
127 rtp->gp_func(rtp);
eacd6f04
PM
128
129 /* Invoke the callbacks. */
130 while (list) {
131 next = list->next;
132 local_bh_disable();
133 list->func(list);
134 local_bh_enable();
135 list = next;
136 cond_resched();
137 }
138 /* Paranoid sleep to keep this from entering a tight loop */
139 schedule_timeout_uninterruptible(HZ/10);
140 }
141}
142
5873b8a9
PM
143/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
144static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
eacd6f04
PM
145{
146 struct task_struct *t;
147
5873b8a9 148 t = kthread_run(rcu_tasks_kthread, rtp, "rcu_tasks_kthread");
eacd6f04 149 if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__))
5873b8a9 150 return;
eacd6f04 151 smp_mb(); /* Ensure others see full kthread. */
eacd6f04 152}
eacd6f04
PM
153
154/* Do the srcu_read_lock() for the above synchronize_srcu(). */
155void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu)
156{
157 preempt_disable();
158 current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu);
159 preempt_enable();
160}
161
162/* Do the srcu_read_unlock() for the above synchronize_srcu(). */
163void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu)
164{
165 preempt_disable();
166 __srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx);
167 preempt_enable();
168}
169
eacd6f04
PM
170#ifndef CONFIG_TINY_RCU
171
172/*
173 * Print any non-default Tasks RCU settings.
174 */
175static void __init rcu_tasks_bootup_oddness(void)
176{
177#ifdef CONFIG_TASKS_RCU
178 if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
179 pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
180 else
181 pr_info("\tTasks RCU enabled.\n");
182#endif /* #ifdef CONFIG_TASKS_RCU */
c84aad76
PM
183#ifdef CONFIG_TASKS_RUDE_RCU
184 pr_info("\tRude variant of Tasks RCU enabled.\n");
185#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
eacd6f04
PM
186}
187
188#endif /* #ifndef CONFIG_TINY_RCU */
5873b8a9
PM
189
190#ifdef CONFIG_TASKS_RCU
191
192////////////////////////////////////////////////////////////////////////
193//
194// Simple variant of RCU whose quiescent states are voluntary context
195// switch, cond_resched_rcu_qs(), user-space execution, and idle.
196// As such, grace periods can take one good long time. There are no
197// read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
198// because this implementation is intended to get the system into a safe
199// state for some of the manipulations involved in tracing and the like.
200// Finally, this implementation does not support high call_rcu_tasks()
201// rates from multiple CPUs. If this is required, per-CPU callback lists
202// will be needed.
203
204/* See if tasks are still holding out, complain if so. */
205static void check_holdout_task(struct task_struct *t,
206 bool needreport, bool *firstreport)
207{
208 int cpu;
209
210 if (!READ_ONCE(t->rcu_tasks_holdout) ||
211 t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
212 !READ_ONCE(t->on_rq) ||
213 (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
214 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
215 WRITE_ONCE(t->rcu_tasks_holdout, false);
216 list_del_init(&t->rcu_tasks_holdout_list);
217 put_task_struct(t);
218 return;
219 }
220 rcu_request_urgent_qs_task(t);
221 if (!needreport)
222 return;
223 if (*firstreport) {
224 pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
225 *firstreport = false;
226 }
227 cpu = task_cpu(t);
228 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
229 t, ".I"[is_idle_task(t)],
230 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
231 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
232 t->rcu_tasks_idle_cpu, cpu);
233 sched_show_task(t);
234}
235
236/* Wait for one RCU-tasks grace period. */
237static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
238{
239 struct task_struct *g, *t;
240 unsigned long lastreport;
241 LIST_HEAD(rcu_tasks_holdouts);
242 int fract;
243
244 /*
245 * Wait for all pre-existing t->on_rq and t->nvcsw transitions
246 * to complete. Invoking synchronize_rcu() suffices because all
247 * these transitions occur with interrupts disabled. Without this
248 * synchronize_rcu(), a read-side critical section that started
249 * before the grace period might be incorrectly seen as having
250 * started after the grace period.
251 *
252 * This synchronize_rcu() also dispenses with the need for a
253 * memory barrier on the first store to t->rcu_tasks_holdout,
254 * as it forces the store to happen after the beginning of the
255 * grace period.
256 */
257 synchronize_rcu();
258
259 /*
260 * There were callbacks, so we need to wait for an RCU-tasks
261 * grace period. Start off by scanning the task list for tasks
262 * that are not already voluntarily blocked. Mark these tasks
263 * and make a list of them in rcu_tasks_holdouts.
264 */
265 rcu_read_lock();
266 for_each_process_thread(g, t) {
267 if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
268 get_task_struct(t);
269 t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
270 WRITE_ONCE(t->rcu_tasks_holdout, true);
271 list_add(&t->rcu_tasks_holdout_list,
272 &rcu_tasks_holdouts);
273 }
274 }
275 rcu_read_unlock();
276
277 /*
278 * Wait for tasks that are in the process of exiting. This
279 * does only part of the job, ensuring that all tasks that were
280 * previously exiting reach the point where they have disabled
281 * preemption, allowing the later synchronize_rcu() to finish
282 * the job.
283 */
284 synchronize_srcu(&tasks_rcu_exit_srcu);
285
286 /*
287 * Each pass through the following loop scans the list of holdout
288 * tasks, removing any that are no longer holdouts. When the list
289 * is empty, we are done.
290 */
291 lastreport = jiffies;
292
293 /* Start off with HZ/10 wait and slowly back off to 1 HZ wait. */
294 fract = 10;
295
296 for (;;) {
297 bool firstreport;
298 bool needreport;
299 int rtst;
300 struct task_struct *t1;
301
302 if (list_empty(&rcu_tasks_holdouts))
303 break;
304
305 /* Slowly back off waiting for holdouts */
306 schedule_timeout_interruptible(HZ/fract);
307
308 if (fract > 1)
309 fract--;
310
311 rtst = READ_ONCE(rcu_task_stall_timeout);
312 needreport = rtst > 0 && time_after(jiffies, lastreport + rtst);
313 if (needreport)
314 lastreport = jiffies;
315 firstreport = true;
316 WARN_ON(signal_pending(current));
317 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
318 rcu_tasks_holdout_list) {
319 check_holdout_task(t, needreport, &firstreport);
320 cond_resched();
321 }
322 }
323
324 /*
325 * Because ->on_rq and ->nvcsw are not guaranteed to have a full
326 * memory barriers prior to them in the schedule() path, memory
327 * reordering on other CPUs could cause their RCU-tasks read-side
328 * critical sections to extend past the end of the grace period.
329 * However, because these ->nvcsw updates are carried out with
330 * interrupts disabled, we can use synchronize_rcu() to force the
331 * needed ordering on all such CPUs.
332 *
333 * This synchronize_rcu() also confines all ->rcu_tasks_holdout
334 * accesses to be within the grace period, avoiding the need for
335 * memory barriers for ->rcu_tasks_holdout accesses.
336 *
337 * In addition, this synchronize_rcu() waits for exiting tasks
338 * to complete their final preempt_disable() region of execution,
339 * cleaning up after the synchronize_srcu() above.
340 */
341 synchronize_rcu();
342}
343
344void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func);
345DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks);
346
347/**
348 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
349 * @rhp: structure to be used for queueing the RCU updates.
350 * @func: actual callback function to be invoked after the grace period
351 *
352 * The callback function will be invoked some time after a full grace
353 * period elapses, in other words after all currently executing RCU
354 * read-side critical sections have completed. call_rcu_tasks() assumes
355 * that the read-side critical sections end at a voluntary context
356 * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
357 * or transition to usermode execution. As such, there are no read-side
358 * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
359 * this primitive is intended to determine that all tasks have passed
360 * through a safe state, not so much for data-strcuture synchronization.
361 *
362 * See the description of call_rcu() for more detailed information on
363 * memory ordering guarantees.
364 */
365void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
366{
367 call_rcu_tasks_generic(rhp, func, &rcu_tasks);
368}
369EXPORT_SYMBOL_GPL(call_rcu_tasks);
370
371/**
372 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
373 *
374 * Control will return to the caller some time after a full rcu-tasks
375 * grace period has elapsed, in other words after all currently
376 * executing rcu-tasks read-side critical sections have elapsed. These
377 * read-side critical sections are delimited by calls to schedule(),
378 * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
379 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
380 *
381 * This is a very specialized primitive, intended only for a few uses in
382 * tracing and other situations requiring manipulation of function
383 * preambles and profiling hooks. The synchronize_rcu_tasks() function
384 * is not (yet) intended for heavy use from multiple CPUs.
385 *
386 * See the description of synchronize_rcu() for more detailed information
387 * on memory ordering guarantees.
388 */
389void synchronize_rcu_tasks(void)
390{
391 synchronize_rcu_tasks_generic(&rcu_tasks);
392}
393EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
394
395/**
396 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
397 *
398 * Although the current implementation is guaranteed to wait, it is not
399 * obligated to, for example, if there are no pending callbacks.
400 */
401void rcu_barrier_tasks(void)
402{
403 /* There is only one callback queue, so this is easy. ;-) */
404 synchronize_rcu_tasks();
405}
406EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
407
408static int __init rcu_spawn_tasks_kthread(void)
409{
410 rcu_spawn_tasks_kthread_generic(&rcu_tasks);
411 return 0;
412}
413core_initcall(rcu_spawn_tasks_kthread);
414
415#endif /* #ifdef CONFIG_TASKS_RCU */
c84aad76
PM
416
417#ifdef CONFIG_TASKS_RUDE_RCU
418
419////////////////////////////////////////////////////////////////////////
420//
421// "Rude" variant of Tasks RCU, inspired by Steve Rostedt's trick of
422// passing an empty function to schedule_on_each_cpu(). This approach
423// provides an asynchronous call_rcu_tasks_rude() API and batching
424// of concurrent calls to the synchronous synchronize_rcu_rude() API.
425// This sends IPIs far and wide and induces otherwise unnecessary context
426// switches on all online CPUs, whether idle or not.
427
428// Empty function to allow workqueues to force a context switch.
429static void rcu_tasks_be_rude(struct work_struct *work)
430{
431}
432
433// Wait for one rude RCU-tasks grace period.
434static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
435{
436 schedule_on_each_cpu(rcu_tasks_be_rude);
437}
438
439void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func);
440DEFINE_RCU_TASKS(rcu_tasks_rude, rcu_tasks_rude_wait_gp, call_rcu_tasks_rude);
441
442/**
443 * call_rcu_tasks_rude() - Queue a callback rude task-based grace period
444 * @rhp: structure to be used for queueing the RCU updates.
445 * @func: actual callback function to be invoked after the grace period
446 *
447 * The callback function will be invoked some time after a full grace
448 * period elapses, in other words after all currently executing RCU
449 * read-side critical sections have completed. call_rcu_tasks_rude()
450 * assumes that the read-side critical sections end at context switch,
451 * cond_resched_rcu_qs(), or transition to usermode execution. As such,
452 * there are no read-side primitives analogous to rcu_read_lock() and
453 * rcu_read_unlock() because this primitive is intended to determine
454 * that all tasks have passed through a safe state, not so much for
455 * data-strcuture synchronization.
456 *
457 * See the description of call_rcu() for more detailed information on
458 * memory ordering guarantees.
459 */
460void call_rcu_tasks_rude(struct rcu_head *rhp, rcu_callback_t func)
461{
462 call_rcu_tasks_generic(rhp, func, &rcu_tasks_rude);
463}
464EXPORT_SYMBOL_GPL(call_rcu_tasks_rude);
465
466/**
467 * synchronize_rcu_tasks_rude - wait for a rude rcu-tasks grace period
468 *
469 * Control will return to the caller some time after a rude rcu-tasks
470 * grace period has elapsed, in other words after all currently
471 * executing rcu-tasks read-side critical sections have elapsed. These
472 * read-side critical sections are delimited by calls to schedule(),
473 * cond_resched_tasks_rcu_qs(), userspace execution, and (in theory,
474 * anyway) cond_resched().
475 *
476 * This is a very specialized primitive, intended only for a few uses in
477 * tracing and other situations requiring manipulation of function preambles
478 * and profiling hooks. The synchronize_rcu_tasks_rude() function is not
479 * (yet) intended for heavy use from multiple CPUs.
480 *
481 * See the description of synchronize_rcu() for more detailed information
482 * on memory ordering guarantees.
483 */
484void synchronize_rcu_tasks_rude(void)
485{
486 synchronize_rcu_tasks_generic(&rcu_tasks_rude);
487}
488EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);
489
490/**
491 * rcu_barrier_tasks_rude - Wait for in-flight call_rcu_tasks_rude() callbacks.
492 *
493 * Although the current implementation is guaranteed to wait, it is not
494 * obligated to, for example, if there are no pending callbacks.
495 */
496void rcu_barrier_tasks_rude(void)
497{
498 /* There is only one callback queue, so this is easy. ;-) */
499 synchronize_rcu_tasks_rude();
500}
501EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
502
503static int __init rcu_spawn_tasks_rude_kthread(void)
504{
505 rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
506 return 0;
507}
508core_initcall(rcu_spawn_tasks_rude_kthread);
509
510#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */