Merge tag '5.2-rc-smb3-fixes' of git://git.samba.org/sfrench/cifs-2.6
[linux-2.6-block.git] / kernel / rcu / rcu.h
CommitLineData
b5b11890 1/* SPDX-License-Identifier: GPL-2.0+ */
29c00b4a
PM
2/*
3 * Read-Copy Update definitions shared among RCU implementations.
4 *
29c00b4a
PM
5 * Copyright IBM Corporation, 2011
6 *
b5b11890 7 * Author: Paul E. McKenney <paulmck@linux.ibm.com>
29c00b4a
PM
8 */
9
10#ifndef __LINUX_RCU_H
11#define __LINUX_RCU_H
12
5cb5c6e1 13#include <trace/events/rcu.h>
e99033c5 14
c2d8089d 15/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */
84585aa8 16#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1)
6136d6e4 17
2e8c28c2
PM
18
19/*
20 * Grace-period counter management.
21 */
22
f1ec57a4 23#define RCU_SEQ_CTR_SHIFT 2
031aeee0
PM
24#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1)
25
26/*
27 * Return the counter portion of a sequence number previously returned
28 * by rcu_seq_snap() or rcu_seq_current().
29 */
30static inline unsigned long rcu_seq_ctr(unsigned long s)
31{
32 return s >> RCU_SEQ_CTR_SHIFT;
33}
34
35/*
36 * Return the state portion of a sequence number previously returned
37 * by rcu_seq_snap() or rcu_seq_current().
38 */
39static inline int rcu_seq_state(unsigned long s)
40{
41 return s & RCU_SEQ_STATE_MASK;
42}
43
80a7956f
PM
44/*
45 * Set the state portion of the pointed-to sequence number.
46 * The caller is responsible for preventing conflicting updates.
47 */
48static inline void rcu_seq_set_state(unsigned long *sp, int newstate)
49{
50 WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK);
51 WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate);
52}
53
2e8c28c2
PM
54/* Adjust sequence number for start of update-side operation. */
55static inline void rcu_seq_start(unsigned long *sp)
56{
57 WRITE_ONCE(*sp, *sp + 1);
58 smp_mb(); /* Ensure update-side operation after counter increment. */
031aeee0 59 WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
2e8c28c2
PM
60}
61
9a414201
PM
62/* Compute the end-of-grace-period value for the specified sequence number. */
63static inline unsigned long rcu_seq_endval(unsigned long *sp)
64{
65 return (*sp | RCU_SEQ_STATE_MASK) + 1;
66}
67
2e8c28c2
PM
68/* Adjust sequence number for end of update-side operation. */
69static inline void rcu_seq_end(unsigned long *sp)
70{
71 smp_mb(); /* Ensure update-side operation before counter increment. */
031aeee0 72 WARN_ON_ONCE(!rcu_seq_state(*sp));
9a414201 73 WRITE_ONCE(*sp, rcu_seq_endval(sp));
2e8c28c2
PM
74}
75
0d805a70
JFG
76/*
77 * rcu_seq_snap - Take a snapshot of the update side's sequence number.
78 *
79 * This function returns the earliest value of the grace-period sequence number
80 * that will indicate that a full grace period has elapsed since the current
81 * time. Once the grace-period sequence number has reached this value, it will
82 * be safe to invoke all callbacks that have been registered prior to the
83 * current time. This value is the current grace-period number plus two to the
84 * power of the number of low-order bits reserved for state, then rounded up to
85 * the next value in which the state bits are all zero.
86 */
2e8c28c2
PM
87static inline unsigned long rcu_seq_snap(unsigned long *sp)
88{
89 unsigned long s;
90
031aeee0 91 s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK;
2e8c28c2
PM
92 smp_mb(); /* Above access must not bleed into critical section. */
93 return s;
94}
95
8660b7d8
PM
96/* Return the current value the update side's sequence number, no ordering. */
97static inline unsigned long rcu_seq_current(unsigned long *sp)
98{
99 return READ_ONCE(*sp);
100}
101
2e3e5e55
PM
102/*
103 * Given a snapshot from rcu_seq_snap(), determine whether or not the
104 * corresponding update-side operation has started.
105 */
106static inline bool rcu_seq_started(unsigned long *sp, unsigned long s)
107{
108 return ULONG_CMP_LT((s - 1) & ~RCU_SEQ_STATE_MASK, READ_ONCE(*sp));
109}
110
2e8c28c2
PM
111/*
112 * Given a snapshot from rcu_seq_snap(), determine whether or not a
113 * full update-side operation has occurred.
114 */
115static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
116{
117 return ULONG_CMP_GE(READ_ONCE(*sp), s);
118}
119
67e14c1e
PM
120/*
121 * Has a grace period completed since the time the old gp_seq was collected?
122 */
123static inline bool rcu_seq_completed_gp(unsigned long old, unsigned long new)
124{
125 return ULONG_CMP_LT(old, new & ~RCU_SEQ_STATE_MASK);
126}
127
128/*
129 * Has a grace period started since the time the old gp_seq was collected?
130 */
131static inline bool rcu_seq_new_gp(unsigned long old, unsigned long new)
132{
133 return ULONG_CMP_LT((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK,
134 new);
135}
136
d7219312
PM
137/*
138 * Roughly how many full grace periods have elapsed between the collection
139 * of the two specified grace periods?
140 */
141static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old)
142{
2ee5aca5
PM
143 unsigned long rnd_diff;
144
145 if (old == new)
146 return 0;
147 /*
148 * Compute the number of grace periods (still shifted up), plus
149 * one if either of new and old is not an exact grace period.
150 */
151 rnd_diff = (new & ~RCU_SEQ_STATE_MASK) -
152 ((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK) +
153 ((new & RCU_SEQ_STATE_MASK) || (old & RCU_SEQ_STATE_MASK));
154 if (ULONG_CMP_GE(RCU_SEQ_STATE_MASK, rnd_diff))
155 return 1; /* Definitely no grace period has elapsed. */
156 return ((rnd_diff - RCU_SEQ_STATE_MASK - 1) >> RCU_SEQ_CTR_SHIFT) + 2;
d7219312
PM
157}
158
29c00b4a
PM
159/*
160 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
7f87c036
PM
161 * by call_rcu() and rcu callback execution, and are therefore not part
162 * of the RCU API. These are in rcupdate.h because they are used by all
163 * RCU implementations.
29c00b4a
PM
164 */
165
166#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
167# define STATE_RCU_HEAD_READY 0
168# define STATE_RCU_HEAD_QUEUED 1
169
170extern struct debug_obj_descr rcuhead_debug_descr;
171
ae150184 172static inline int debug_rcu_head_queue(struct rcu_head *head)
29c00b4a 173{
ae150184
PM
174 int r1;
175
176 r1 = debug_object_activate(head, &rcuhead_debug_descr);
29c00b4a
PM
177 debug_object_active_state(head, &rcuhead_debug_descr,
178 STATE_RCU_HEAD_READY,
179 STATE_RCU_HEAD_QUEUED);
ae150184 180 return r1;
29c00b4a
PM
181}
182
183static inline void debug_rcu_head_unqueue(struct rcu_head *head)
184{
185 debug_object_active_state(head, &rcuhead_debug_descr,
186 STATE_RCU_HEAD_QUEUED,
187 STATE_RCU_HEAD_READY);
188 debug_object_deactivate(head, &rcuhead_debug_descr);
189}
190#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
ae150184 191static inline int debug_rcu_head_queue(struct rcu_head *head)
29c00b4a 192{
ae150184 193 return 0;
29c00b4a
PM
194}
195
196static inline void debug_rcu_head_unqueue(struct rcu_head *head)
197{
198}
199#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
200
bd73a7f5 201void kfree(const void *);
29c00b4a 202
406e3e53
PM
203/*
204 * Reclaim the specified callback, either by invoking it (non-lazy case)
205 * or freeing it directly (lazy case). Return true if lazy, false otherwise.
206 */
e66c33d5 207static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
29c00b4a 208{
74de6960 209 rcu_callback_t f;
29c00b4a
PM
210 unsigned long offset = (unsigned long)head->func;
211
24ef659a 212 rcu_lock_acquire(&rcu_callback_map);
29c00b4a 213 if (__is_kfree_rcu_offset(offset)) {
4f5fbd78 214 trace_rcu_invoke_kfree_callback(rn, head, offset);
29c00b4a 215 kfree((void *)head - offset);
24ef659a 216 rcu_lock_release(&rcu_callback_map);
406e3e53 217 return true;
29c00b4a 218 } else {
4f5fbd78 219 trace_rcu_invoke_callback(rn, head);
74de6960
PM
220 f = head->func;
221 WRITE_ONCE(head->func, (rcu_callback_t)0L);
222 f(head);
24ef659a 223 rcu_lock_release(&rcu_callback_map);
406e3e53 224 return false;
29c00b4a
PM
225 }
226}
227
6bfc09e2
PM
228#ifdef CONFIG_RCU_STALL_COMMON
229
230extern int rcu_cpu_stall_suppress;
10462d6f 231extern int rcu_cpu_stall_timeout;
6bfc09e2
PM
232int rcu_jiffies_till_stall_check(void);
233
f22ce091
PM
234#define rcu_ftrace_dump_stall_suppress() \
235do { \
236 if (!rcu_cpu_stall_suppress) \
237 rcu_cpu_stall_suppress = 3; \
238} while (0)
239
240#define rcu_ftrace_dump_stall_unsuppress() \
241do { \
242 if (rcu_cpu_stall_suppress == 3) \
243 rcu_cpu_stall_suppress = 0; \
244} while (0)
245
246#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
247#define rcu_ftrace_dump_stall_suppress()
248#define rcu_ftrace_dump_stall_unsuppress()
6bfc09e2
PM
249#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
250
0d752924
PM
251/*
252 * Strings used in tracepoints need to be exported via the
253 * tracing system such that tools like perf and trace-cmd can
254 * translate the string address pointers to actual text.
255 */
256#define TPS(x) tracepoint_string(x)
257
b8989b76
PM
258/*
259 * Dump the ftrace buffer, but only one time per callsite per boot.
260 */
261#define rcu_ftrace_dump(oops_dump_mode) \
262do { \
263 static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
264 \
265 if (!atomic_read(&___rfd_beenhere) && \
83b6ca1f
PM
266 !atomic_xchg(&___rfd_beenhere, 1)) { \
267 tracing_off(); \
f22ce091 268 rcu_ftrace_dump_stall_suppress(); \
b8989b76 269 ftrace_dump(oops_dump_mode); \
f22ce091 270 rcu_ftrace_dump_stall_unsuppress(); \
83b6ca1f 271 } \
b8989b76
PM
272} while (0)
273
aa23c6fb 274void rcu_early_boot_tests(void);
52d7e48b 275void rcu_test_sync_prims(void);
aa23c6fb 276
5f6130fa
LJ
277/*
278 * This function really isn't for public consumption, but RCU is special in
279 * that context switches can allow the state machine to make progress.
280 */
281extern void resched_cpu(int cpu);
282
2b34c43c
PM
283#if defined(SRCU) || !defined(TINY_RCU)
284
285#include <linux/rcu_node_tree.h>
286
287extern int rcu_num_lvls;
e95d68d2 288extern int num_rcu_lvl[];
2b34c43c
PM
289extern int rcu_num_nodes;
290static bool rcu_fanout_exact;
291static int rcu_fanout_leaf;
292
293/*
294 * Compute the per-level fanout, either using the exact fanout specified
295 * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
296 */
297static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
298{
299 int i;
300
301 if (rcu_fanout_exact) {
302 levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
303 for (i = rcu_num_lvls - 2; i >= 0; i--)
304 levelspread[i] = RCU_FANOUT;
305 } else {
306 int ccur;
307 int cprv;
308
309 cprv = nr_cpu_ids;
310 for (i = rcu_num_lvls - 1; i >= 0; i--) {
311 ccur = levelcnt[i];
312 levelspread[i] = (cprv + ccur - 1) / ccur;
313 cprv = ccur;
314 }
315 }
316}
317
7f87c036 318/* Returns a pointer to the first leaf rcu_node structure. */
aedf4ba9 319#define rcu_first_leaf_node() (rcu_state.level[rcu_num_lvls - 1])
5b4c11d5
PM
320
321/* Is this rcu_node a leaf? */
322#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
323
5257514d 324/* Is this rcu_node the last leaf? */
aedf4ba9 325#define rcu_is_last_leaf_node(rnp) ((rnp) == &rcu_state.node[rcu_num_nodes - 1])
5257514d 326
efbe451d 327/*
aedf4ba9 328 * Do a full breadth-first scan of the {s,}rcu_node structures for the
7f87c036
PM
329 * specified state structure (for SRCU) or the only rcu_state structure
330 * (for RCU).
efbe451d 331 */
aedf4ba9
PM
332#define srcu_for_each_node_breadth_first(sp, rnp) \
333 for ((rnp) = &(sp)->node[0]; \
334 (rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++)
335#define rcu_for_each_node_breadth_first(rnp) \
336 srcu_for_each_node_breadth_first(&rcu_state, rnp)
efbe451d
PM
337
338/*
7f87c036
PM
339 * Scan the leaves of the rcu_node hierarchy for the rcu_state structure.
340 * Note that if there is a singleton rcu_node tree with but one rcu_node
341 * structure, this loop -will- visit the rcu_node structure. It is still
342 * a leaf node, even if it is also the root node.
efbe451d 343 */
aedf4ba9
PM
344#define rcu_for_each_leaf_node(rnp) \
345 for ((rnp) = rcu_first_leaf_node(); \
346 (rnp) < &rcu_state.node[rcu_num_nodes]; (rnp)++)
efbe451d
PM
347
348/*
349 * Iterate over all possible CPUs in a leaf RCU node.
350 */
351#define for_each_leaf_node_possible_cpu(rnp, cpu) \
65963d24
PM
352 for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \
353 (cpu) <= rnp->grphi; \
354 (cpu) = cpumask_next((cpu), cpu_possible_mask))
355
356/*
357 * Iterate over all CPUs in a leaf RCU node's specified mask.
358 */
359#define rcu_find_next_bit(rnp, cpu, mask) \
360 ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu)))
361#define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \
362 for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \
363 (cpu) <= rnp->grphi; \
364 (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
efbe451d 365
83d40bd3
PM
366/*
367 * Wrappers for the rcu_node::lock acquire and release.
368 *
369 * Because the rcu_nodes form a tree, the tree traversal locking will observe
370 * different lock values, this in turn means that an UNLOCK of one level
371 * followed by a LOCK of another level does not imply a full memory barrier;
372 * and most importantly transitivity is lost.
373 *
374 * In order to restore full ordering between tree levels, augment the regular
375 * lock acquire functions with smp_mb__after_unlock_lock().
376 *
377 * As ->lock of struct rcu_node is a __private field, therefore one should use
378 * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
379 */
380#define raw_spin_lock_rcu_node(p) \
381do { \
382 raw_spin_lock(&ACCESS_PRIVATE(p, lock)); \
383 smp_mb__after_unlock_lock(); \
384} while (0)
385
386#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock))
387
388#define raw_spin_lock_irq_rcu_node(p) \
389do { \
390 raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
391 smp_mb__after_unlock_lock(); \
392} while (0)
393
394#define raw_spin_unlock_irq_rcu_node(p) \
395 raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
396
4e4bea74 397#define raw_spin_lock_irqsave_rcu_node(p, flags) \
83d40bd3 398do { \
4e4bea74 399 raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
83d40bd3
PM
400 smp_mb__after_unlock_lock(); \
401} while (0)
402
4e4bea74 403#define raw_spin_unlock_irqrestore_rcu_node(p, flags) \
a32e01ee 404 raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)
83d40bd3
PM
405
406#define raw_spin_trylock_rcu_node(p) \
407({ \
408 bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock)); \
409 \
410 if (___locked) \
411 smp_mb__after_unlock_lock(); \
412 ___locked; \
413})
414
a32e01ee
MW
415#define raw_lockdep_assert_held_rcu_node(p) \
416 lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
417
2b34c43c
PM
418#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
419
e0fcba9a
PM
420#ifdef CONFIG_SRCU
421void srcu_init(void);
422#else /* #ifdef CONFIG_SRCU */
423static inline void srcu_init(void) { }
424#endif /* #else #ifdef CONFIG_SRCU */
425
25c36329
PM
426#ifdef CONFIG_TINY_RCU
427/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
7414fac0
PM
428static inline bool rcu_gp_is_normal(void) { return true; }
429static inline bool rcu_gp_is_expedited(void) { return false; }
430static inline void rcu_expedite_gp(void) { }
431static inline void rcu_unexpedite_gp(void) { }
bfbd767d 432static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
25c36329
PM
433#else /* #ifdef CONFIG_TINY_RCU */
434bool rcu_gp_is_normal(void); /* Internal RCU use. */
435bool rcu_gp_is_expedited(void); /* Internal RCU use. */
436void rcu_expedite_gp(void);
437void rcu_unexpedite_gp(void);
438void rcupdate_announce_bootup_oddness(void);
bfbd767d 439void rcu_request_urgent_qs_task(struct task_struct *t);
25c36329
PM
440#endif /* #else #ifdef CONFIG_TINY_RCU */
441
82118249
PM
442#define RCU_SCHEDULER_INACTIVE 0
443#define RCU_SCHEDULER_INIT 1
444#define RCU_SCHEDULER_RUNNING 2
445
cad7b389
PM
446enum rcutorture_type {
447 RCU_FLAVOR,
cad7b389
PM
448 RCU_TASKS_FLAVOR,
449 SRCU_FLAVOR,
450 INVALID_RCU_FLAVOR
451};
452
453#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
454void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
aebc8264 455 unsigned long *gp_seq);
cad7b389
PM
456void rcutorture_record_progress(unsigned long vernum);
457void do_trace_rcu_torture_read(const char *rcutorturename,
458 struct rcu_head *rhp,
459 unsigned long secs,
460 unsigned long c_old,
461 unsigned long c);
462#else
463static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
aebc8264 464 int *flags, unsigned long *gp_seq)
cad7b389
PM
465{
466 *flags = 0;
aebc8264 467 *gp_seq = 0;
cad7b389 468}
7414fac0 469static inline void rcutorture_record_progress(unsigned long vernum) { }
cad7b389
PM
470#ifdef CONFIG_RCU_TRACE
471void do_trace_rcu_torture_read(const char *rcutorturename,
472 struct rcu_head *rhp,
473 unsigned long secs,
474 unsigned long c_old,
475 unsigned long c);
476#else
477#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
478 do { } while (0)
479#endif
480#endif
481
482#ifdef CONFIG_TINY_SRCU
483
484static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
485 struct srcu_struct *sp, int *flags,
aebc8264 486 unsigned long *gp_seq)
cad7b389
PM
487{
488 if (test_type != SRCU_FLAVOR)
489 return;
490 *flags = 0;
aebc8264 491 *gp_seq = sp->srcu_idx;
cad7b389
PM
492}
493
494#elif defined(CONFIG_TREE_SRCU)
495
496void srcutorture_get_gp_data(enum rcutorture_type test_type,
497 struct srcu_struct *sp, int *flags,
aebc8264 498 unsigned long *gp_seq);
cad7b389 499
cad7b389
PM
500#endif
501
e3c8d51e 502#ifdef CONFIG_TINY_RCU
17ef2fe9 503static inline unsigned long rcu_get_gp_seq(void) { return 0; }
7414fac0 504static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
7414fac0
PM
505static inline unsigned long
506srcu_batches_completed(struct srcu_struct *sp) { return 0; }
507static inline void rcu_force_quiescent_state(void) { }
7414fac0 508static inline void show_rcu_gp_kthreads(void) { }
4babd855 509static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
e0aff973 510static inline void rcu_fwd_progress_check(unsigned long j) { }
e3c8d51e 511#else /* #ifdef CONFIG_TINY_RCU */
17ef2fe9 512unsigned long rcu_get_gp_seq(void);
e3c8d51e 513unsigned long rcu_exp_batches_completed(void);
5a0465e1 514unsigned long srcu_batches_completed(struct srcu_struct *sp);
e3c8d51e 515void show_rcu_gp_kthreads(void);
4babd855 516int rcu_get_gp_kthreads_prio(void);
e0aff973 517void rcu_fwd_progress_check(unsigned long j);
e3c8d51e 518void rcu_force_quiescent_state(void);
ad7c946b 519extern struct workqueue_struct *rcu_gp_wq;
25f3d7ef 520extern struct workqueue_struct *rcu_par_gp_wq;
e3c8d51e
PM
521#endif /* #else #ifdef CONFIG_TINY_RCU */
522
44c65ff2 523#ifdef CONFIG_RCU_NOCB_CPU
3d54f798 524bool rcu_is_nocb_cpu(int cpu);
5ab7ab83 525void rcu_bind_current_to_nocb(void);
3d54f798
PM
526#else
527static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
5ab7ab83 528static inline void rcu_bind_current_to_nocb(void) { }
3d54f798
PM
529#endif
530
29c00b4a 531#endif /* __LINUX_RCU_H */