Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Read-Copy Update mechanism for mutual exclusion | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
01c1c660 | 18 | * Copyright IBM Corporation, 2001 |
1da177e4 LT |
19 | * |
20 | * Authors: Dipankar Sarma <dipankar@in.ibm.com> | |
21 | * Manfred Spraul <manfred@colorfullife.com> | |
a71fca58 | 22 | * |
1da177e4 LT |
23 | * Based on the original work by Paul McKenney <paulmck@us.ibm.com> |
24 | * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. | |
25 | * Papers: | |
26 | * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf | |
27 | * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) | |
28 | * | |
29 | * For detailed explanation of Read-Copy Update mechanism see - | |
a71fca58 | 30 | * http://lse.sourceforge.net/locking/rcupdate.html |
1da177e4 LT |
31 | * |
32 | */ | |
33 | #include <linux/types.h> | |
34 | #include <linux/kernel.h> | |
35 | #include <linux/init.h> | |
36 | #include <linux/spinlock.h> | |
37 | #include <linux/smp.h> | |
38 | #include <linux/interrupt.h> | |
39 | #include <linux/sched.h> | |
40 | #include <asm/atomic.h> | |
41 | #include <linux/bitops.h> | |
1da177e4 LT |
42 | #include <linux/percpu.h> |
43 | #include <linux/notifier.h> | |
1da177e4 | 44 | #include <linux/cpu.h> |
9331b315 | 45 | #include <linux/mutex.h> |
01c1c660 | 46 | #include <linux/module.h> |
a6826048 | 47 | #include <linux/kernel_stat.h> |
1da177e4 | 48 | |
70f12f84 PM |
49 | enum rcu_barrier { |
50 | RCU_BARRIER_STD, | |
51 | RCU_BARRIER_BH, | |
52 | RCU_BARRIER_SCHED, | |
53 | }; | |
54 | ||
01c1c660 | 55 | static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; |
21a1ea9e | 56 | static atomic_t rcu_barrier_cpu_count; |
9331b315 | 57 | static DEFINE_MUTEX(rcu_barrier_mutex); |
21a1ea9e | 58 | static struct completion rcu_barrier_completion; |
a6826048 | 59 | int rcu_scheduler_active __read_mostly; |
21a1ea9e | 60 | |
5b1d07ed DH |
61 | static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0); |
62 | static struct rcu_head rcu_migrate_head[3]; | |
63 | static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq); | |
64 | ||
fbf6bfca PM |
65 | /* |
66 | * Awaken the corresponding synchronize_rcu() instance now that a | |
67 | * grace period has elapsed. | |
68 | */ | |
4446a36f | 69 | void wakeme_after_rcu(struct rcu_head *head) |
21a1ea9e | 70 | { |
01c1c660 PM |
71 | struct rcu_synchronize *rcu; |
72 | ||
73 | rcu = container_of(head, struct rcu_synchronize, head); | |
74 | complete(&rcu->completion); | |
21a1ea9e | 75 | } |
1da177e4 | 76 | |
16e30811 PM |
77 | #ifdef CONFIG_TREE_PREEMPT_RCU |
78 | ||
1da177e4 | 79 | /** |
01c1c660 | 80 | * synchronize_rcu - wait until a grace period has elapsed. |
1da177e4 | 81 | * |
01c1c660 PM |
82 | * Control will return to the caller some time after a full grace |
83 | * period has elapsed, in other words after all currently executing RCU | |
1da177e4 LT |
84 | * read-side critical sections have completed. RCU read-side critical |
85 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | |
86 | * and may be nested. | |
87 | */ | |
ea7d3fef PM |
88 | void synchronize_rcu(void) |
89 | { | |
90 | struct rcu_synchronize rcu; | |
a6826048 | 91 | |
16e30811 | 92 | if (!rcu_scheduler_active) |
a6826048 PM |
93 | return; |
94 | ||
ea7d3fef PM |
95 | init_completion(&rcu.completion); |
96 | /* Will wake me after RCU finished. */ | |
97 | call_rcu(&rcu.head, wakeme_after_rcu); | |
98 | /* Wait for it. */ | |
99 | wait_for_completion(&rcu.completion); | |
100 | } | |
01c1c660 | 101 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
c32e0660 | 102 | |
16e30811 PM |
103 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
104 | ||
105 | /** | |
106 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. | |
107 | * | |
108 | * Control will return to the caller some time after a full rcu-sched | |
109 | * grace period has elapsed, in other words after all currently executing | |
110 | * rcu-sched read-side critical sections have completed. These read-side | |
111 | * critical sections are delimited by rcu_read_lock_sched() and | |
112 | * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), | |
113 | * local_irq_disable(), and so on may be used in place of | |
114 | * rcu_read_lock_sched(). | |
115 | * | |
116 | * This means that all preempt_disable code sequences, including NMI and | |
117 | * hardware-interrupt handlers, in progress on entry will have completed | |
118 | * before this primitive returns. However, this does not guarantee that | |
119 | * softirq handlers will have completed, since in some kernels, these | |
120 | * handlers can run in process context, and can block. | |
121 | * | |
122 | * This primitive provides the guarantees made by the (now removed) | |
123 | * synchronize_kernel() API. In contrast, synchronize_rcu() only | |
124 | * guarantees that rcu_read_lock() sections will have completed. | |
125 | * In "classic RCU", these two guarantees happen to be one and | |
126 | * the same, but can differ in realtime RCU implementations. | |
127 | */ | |
128 | void synchronize_sched(void) | |
129 | { | |
130 | struct rcu_synchronize rcu; | |
131 | ||
132 | if (rcu_blocking_is_gp()) | |
133 | return; | |
134 | ||
135 | init_completion(&rcu.completion); | |
136 | /* Will wake me after RCU finished. */ | |
137 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | |
138 | /* Wait for it. */ | |
139 | wait_for_completion(&rcu.completion); | |
140 | } | |
141 | EXPORT_SYMBOL_GPL(synchronize_sched); | |
142 | ||
03b042bf PM |
143 | /** |
144 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | |
145 | * | |
146 | * Control will return to the caller some time after a full rcu_bh grace | |
147 | * period has elapsed, in other words after all currently executing rcu_bh | |
148 | * read-side critical sections have completed. RCU read-side critical | |
149 | * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), | |
150 | * and may be nested. | |
151 | */ | |
152 | void synchronize_rcu_bh(void) | |
153 | { | |
154 | struct rcu_synchronize rcu; | |
155 | ||
156 | if (rcu_blocking_is_gp()) | |
157 | return; | |
158 | ||
159 | init_completion(&rcu.completion); | |
160 | /* Will wake me after RCU finished. */ | |
161 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | |
162 | /* Wait for it. */ | |
163 | wait_for_completion(&rcu.completion); | |
164 | } | |
165 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | |
166 | ||
ab4720ec DS |
167 | static void rcu_barrier_callback(struct rcu_head *notused) |
168 | { | |
169 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | |
170 | complete(&rcu_barrier_completion); | |
171 | } | |
172 | ||
173 | /* | |
174 | * Called with preemption disabled, and from cross-cpu IRQ context. | |
175 | */ | |
70f12f84 | 176 | static void rcu_barrier_func(void *type) |
ab4720ec DS |
177 | { |
178 | int cpu = smp_processor_id(); | |
01c1c660 | 179 | struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); |
ab4720ec | 180 | |
ab4720ec | 181 | atomic_inc(&rcu_barrier_cpu_count); |
70f12f84 PM |
182 | switch ((enum rcu_barrier)type) { |
183 | case RCU_BARRIER_STD: | |
184 | call_rcu(head, rcu_barrier_callback); | |
185 | break; | |
186 | case RCU_BARRIER_BH: | |
187 | call_rcu_bh(head, rcu_barrier_callback); | |
188 | break; | |
189 | case RCU_BARRIER_SCHED: | |
190 | call_rcu_sched(head, rcu_barrier_callback); | |
191 | break; | |
192 | } | |
ab4720ec DS |
193 | } |
194 | ||
5b1d07ed DH |
195 | static inline void wait_migrated_callbacks(void) |
196 | { | |
197 | wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); | |
03b042bf | 198 | smp_mb(); /* In case we didn't sleep. */ |
5b1d07ed | 199 | } |
f69b17d7 | 200 | |
70f12f84 PM |
201 | /* |
202 | * Orchestrate the specified type of RCU barrier, waiting for all | |
203 | * RCU callbacks of the specified type to complete. | |
ab4720ec | 204 | */ |
70f12f84 | 205 | static void _rcu_barrier(enum rcu_barrier type) |
ab4720ec DS |
206 | { |
207 | BUG_ON(in_interrupt()); | |
9331b315 IM |
208 | /* Take cpucontrol mutex to protect against CPU hotplug */ |
209 | mutex_lock(&rcu_barrier_mutex); | |
ab4720ec | 210 | init_completion(&rcu_barrier_completion); |
e0ecfa79 | 211 | /* |
5f865151 LJ |
212 | * Initialize rcu_barrier_cpu_count to 1, then invoke |
213 | * rcu_barrier_func() on each CPU, so that each CPU also has | |
214 | * incremented rcu_barrier_cpu_count. Only then is it safe to | |
215 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | |
216 | * might complete its grace period before all of the other CPUs | |
217 | * did their increment, causing this function to return too | |
218 | * early. | |
e0ecfa79 | 219 | */ |
5f865151 | 220 | atomic_set(&rcu_barrier_cpu_count, 1); |
59190f42 | 221 | on_each_cpu(rcu_barrier_func, (void *)type, 1); |
5f865151 LJ |
222 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
223 | complete(&rcu_barrier_completion); | |
ab4720ec | 224 | wait_for_completion(&rcu_barrier_completion); |
9331b315 | 225 | mutex_unlock(&rcu_barrier_mutex); |
f69b17d7 | 226 | wait_migrated_callbacks(); |
ab4720ec | 227 | } |
70f12f84 PM |
228 | |
229 | /** | |
230 | * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. | |
231 | */ | |
232 | void rcu_barrier(void) | |
233 | { | |
234 | _rcu_barrier(RCU_BARRIER_STD); | |
235 | } | |
ab4720ec DS |
236 | EXPORT_SYMBOL_GPL(rcu_barrier); |
237 | ||
70f12f84 PM |
238 | /** |
239 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | |
240 | */ | |
241 | void rcu_barrier_bh(void) | |
242 | { | |
243 | _rcu_barrier(RCU_BARRIER_BH); | |
244 | } | |
245 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |
246 | ||
247 | /** | |
248 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | |
249 | */ | |
250 | void rcu_barrier_sched(void) | |
251 | { | |
252 | _rcu_barrier(RCU_BARRIER_SCHED); | |
253 | } | |
254 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | |
255 | ||
f69b17d7 LJ |
256 | static void rcu_migrate_callback(struct rcu_head *notused) |
257 | { | |
258 | if (atomic_dec_and_test(&rcu_migrate_type_count)) | |
259 | wake_up(&rcu_migrate_wq); | |
260 | } | |
261 | ||
2e597558 PM |
262 | extern int rcu_cpu_notify(struct notifier_block *self, |
263 | unsigned long action, void *hcpu); | |
264 | ||
f69b17d7 LJ |
265 | static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, |
266 | unsigned long action, void *hcpu) | |
267 | { | |
2e597558 | 268 | rcu_cpu_notify(self, action, hcpu); |
f69b17d7 LJ |
269 | if (action == CPU_DYING) { |
270 | /* | |
271 | * preempt_disable() in on_each_cpu() prevents stop_machine(), | |
272 | * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" | |
273 | * returns, all online cpus have queued rcu_barrier_func(), | |
274 | * and the dead cpu(if it exist) queues rcu_migrate_callback()s. | |
275 | * | |
276 | * These callbacks ensure _rcu_barrier() waits for all | |
277 | * RCU callbacks of the specified type to complete. | |
278 | */ | |
279 | atomic_set(&rcu_migrate_type_count, 3); | |
280 | call_rcu_bh(rcu_migrate_head, rcu_migrate_callback); | |
281 | call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback); | |
282 | call_rcu(rcu_migrate_head + 2, rcu_migrate_callback); | |
1423cc03 PM |
283 | } else if (action == CPU_DOWN_PREPARE) { |
284 | /* Don't need to wait until next removal operation. */ | |
f69b17d7 LJ |
285 | /* rcu_migrate_head is protected by cpu_add_remove_lock */ |
286 | wait_migrated_callbacks(); | |
287 | } | |
288 | ||
289 | return NOTIFY_OK; | |
290 | } | |
291 | ||
1da177e4 LT |
292 | void __init rcu_init(void) |
293 | { | |
2e597558 PM |
294 | int i; |
295 | ||
01c1c660 | 296 | __rcu_init(); |
2e597558 PM |
297 | cpu_notifier(rcu_barrier_cpu_hotplug, 0); |
298 | ||
299 | /* | |
300 | * We don't need protection against CPU-hotplug here because | |
301 | * this is called early in boot, before either interrupts | |
302 | * or the scheduler are operational. | |
303 | */ | |
304 | for_each_online_cpu(i) | |
305 | rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i); | |
1da177e4 LT |
306 | } |
307 | ||
a6826048 PM |
308 | void rcu_scheduler_starting(void) |
309 | { | |
310 | WARN_ON(num_online_cpus() != 1); | |
311 | WARN_ON(nr_context_switches() > 0); | |
312 | rcu_scheduler_active = 1; | |
313 | } |