Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
4eacdf18 | 2 | /* |
08ab707d FW |
3 | * Context tracking: Probe on high level context boundaries such as kernel, |
4 | * userspace, guest or idle. | |
4eacdf18 FW |
5 | * |
6 | * This is used by RCU to remove its dependency on the timer tick while a CPU | |
08ab707d | 7 | * runs in idle, userspace or guest mode. |
4eacdf18 | 8 | * |
08ab707d | 9 | * User/guest tracking started by Frederic Weisbecker: |
4eacdf18 | 10 | * |
08ab707d | 11 | * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker |
4eacdf18 FW |
12 | * |
13 | * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton, | |
14 | * Steven Rostedt, Peter Zijlstra for suggestions and improvements. | |
15 | * | |
08ab707d FW |
16 | * RCU extended quiescent state bits imported from kernel/rcu/tree.c |
17 | * where the relevant authorship may be found. | |
4eacdf18 FW |
18 | */ |
19 | ||
91d1aa43 FW |
20 | #include <linux/context_tracking.h> |
21 | #include <linux/rcupdate.h> | |
22 | #include <linux/sched.h> | |
91d1aa43 | 23 | #include <linux/hardirq.h> |
6a61671b | 24 | #include <linux/export.h> |
4cdf77a8 | 25 | #include <linux/kprobes.h> |
17211455 | 26 | #include <trace/events/rcu.h> |
91d1aa43 | 27 | |
e67198cc | 28 | |
62e2412d FW |
29 | DEFINE_PER_CPU(struct context_tracking, context_tracking) = { |
30 | #ifdef CONFIG_CONTEXT_TRACKING_IDLE | |
904e600e | 31 | .dynticks_nesting = 1, |
95e04f48 | 32 | .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, |
62e2412d | 33 | #endif |
17147677 | 34 | .state = ATOMIC_INIT(RCU_DYNTICKS_IDX), |
62e2412d FW |
35 | }; |
36 | EXPORT_SYMBOL_GPL(context_tracking); | |
37 | ||
e67198cc | 38 | #ifdef CONFIG_CONTEXT_TRACKING_IDLE |
17211455 FW |
39 | #define TPS(x) tracepoint_string(x) |
40 | ||
41 | /* Record the current task on dyntick-idle entry. */ | |
42 | static __always_inline void rcu_dynticks_task_enter(void) | |
43 | { | |
44 | #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) | |
45 | WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); | |
46 | #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ | |
47 | } | |
48 | ||
49 | /* Record no current task on dyntick-idle exit. */ | |
50 | static __always_inline void rcu_dynticks_task_exit(void) | |
51 | { | |
52 | #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) | |
53 | WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); | |
54 | #endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ | |
55 | } | |
56 | ||
57 | /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */ | |
58 | static __always_inline void rcu_dynticks_task_trace_enter(void) | |
59 | { | |
60 | #ifdef CONFIG_TASKS_TRACE_RCU | |
61 | if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) | |
62 | current->trc_reader_special.b.need_mb = true; | |
63 | #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ | |
64 | } | |
65 | ||
66 | /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */ | |
67 | static __always_inline void rcu_dynticks_task_trace_exit(void) | |
68 | { | |
69 | #ifdef CONFIG_TASKS_TRACE_RCU | |
70 | if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) | |
71 | current->trc_reader_special.b.need_mb = false; | |
72 | #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ | |
73 | } | |
74 | ||
75 | /* | |
76 | * Record entry into an extended quiescent state. This is only to be | |
77 | * called when not already in an extended quiescent state, that is, | |
78 | * RCU is watching prior to the call to this function and is no longer | |
79 | * watching upon return. | |
80 | */ | |
17147677 | 81 | static noinstr void ct_kernel_exit_state(int offset) |
17211455 FW |
82 | { |
83 | int seq; | |
84 | ||
85 | /* | |
86 | * CPUs seeing atomic_add_return() must see prior RCU read-side | |
87 | * critical sections, and we also must force ordering with the | |
88 | * next idle sojourn. | |
89 | */ | |
90 | rcu_dynticks_task_trace_enter(); // Before ->dynticks update! | |
17147677 | 91 | seq = ct_state_inc(offset); |
17211455 | 92 | // RCU is no longer watching. Better be in extended quiescent state! |
17147677 | 93 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX)); |
17211455 FW |
94 | } |
95 | ||
96 | /* | |
97 | * Record exit from an extended quiescent state. This is only to be | |
98 | * called from an extended quiescent state, that is, RCU is not watching | |
99 | * prior to the call to this function and is watching upon return. | |
100 | */ | |
17147677 | 101 | static noinstr void ct_kernel_enter_state(int offset) |
17211455 FW |
102 | { |
103 | int seq; | |
104 | ||
105 | /* | |
106 | * CPUs seeing atomic_add_return() must see prior idle sojourns, | |
107 | * and we also must force ordering with the next RCU read-side | |
108 | * critical section. | |
109 | */ | |
17147677 | 110 | seq = ct_state_inc(offset); |
17211455 FW |
111 | // RCU is now watching. Better not be in an extended quiescent state! |
112 | rcu_dynticks_task_trace_exit(); // After ->dynticks update! | |
17147677 | 113 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX)); |
17211455 FW |
114 | } |
115 | ||
116 | /* | |
117 | * Enter an RCU extended quiescent state, which can be either the | |
118 | * idle loop or adaptive-tickless usermode execution. | |
119 | * | |
120 | * We crowbar the ->dynticks_nmi_nesting field to zero to allow for | |
121 | * the possibility of usermode upcalls having messed up our count | |
122 | * of interrupt nesting level during the prior busy period. | |
123 | */ | |
17147677 | 124 | static void noinstr ct_kernel_exit(bool user, int offset) |
17211455 FW |
125 | { |
126 | struct context_tracking *ct = this_cpu_ptr(&context_tracking); | |
127 | ||
128 | WARN_ON_ONCE(ct_dynticks_nmi_nesting() != DYNTICK_IRQ_NONIDLE); | |
129 | WRITE_ONCE(ct->dynticks_nmi_nesting, 0); | |
130 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && | |
131 | ct_dynticks_nesting() == 0); | |
132 | if (ct_dynticks_nesting() != 1) { | |
133 | // RCU will still be watching, so just do accounting and leave. | |
134 | ct->dynticks_nesting--; | |
135 | return; | |
136 | } | |
137 | ||
138 | instrumentation_begin(); | |
139 | lockdep_assert_irqs_disabled(); | |
140 | trace_rcu_dyntick(TPS("Start"), ct_dynticks_nesting(), 0, ct_dynticks()); | |
141 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); | |
142 | rcu_preempt_deferred_qs(current); | |
143 | ||
17147677 FW |
144 | // instrumentation for the noinstr ct_kernel_exit_state() |
145 | instrument_atomic_write(&ct->state, sizeof(ct->state)); | |
17211455 FW |
146 | |
147 | instrumentation_end(); | |
148 | WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */ | |
149 | // RCU is watching here ... | |
17147677 | 150 | ct_kernel_exit_state(offset); |
17211455 FW |
151 | // ... but is no longer watching here. |
152 | rcu_dynticks_task_enter(); | |
153 | } | |
154 | ||
155 | /* | |
156 | * Exit an RCU extended quiescent state, which can be either the | |
157 | * idle loop or adaptive-tickless usermode execution. | |
158 | * | |
159 | * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to | |
160 | * allow for the possibility of usermode upcalls messing up our count of | |
161 | * interrupt nesting level during the busy period that is just now starting. | |
162 | */ | |
17147677 | 163 | static void noinstr ct_kernel_enter(bool user, int offset) |
17211455 FW |
164 | { |
165 | struct context_tracking *ct = this_cpu_ptr(&context_tracking); | |
166 | long oldval; | |
167 | ||
168 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled()); | |
169 | oldval = ct_dynticks_nesting(); | |
170 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); | |
171 | if (oldval) { | |
172 | // RCU was already watching, so just do accounting and leave. | |
173 | ct->dynticks_nesting++; | |
174 | return; | |
175 | } | |
176 | rcu_dynticks_task_exit(); | |
177 | // RCU is not watching here ... | |
17147677 | 178 | ct_kernel_enter_state(offset); |
17211455 FW |
179 | // ... but is watching here. |
180 | instrumentation_begin(); | |
181 | ||
17147677 FW |
182 | // instrumentation for the noinstr ct_kernel_enter_state() |
183 | instrument_atomic_write(&ct->state, sizeof(ct->state)); | |
17211455 FW |
184 | |
185 | trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks()); | |
186 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); | |
187 | WRITE_ONCE(ct->dynticks_nesting, 1); | |
188 | WARN_ON_ONCE(ct_dynticks_nmi_nesting()); | |
189 | WRITE_ONCE(ct->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); | |
190 | instrumentation_end(); | |
191 | } | |
192 | ||
193 | /** | |
c33ef43a | 194 | * ct_nmi_exit - inform RCU of exit from NMI context |
17211455 FW |
195 | * |
196 | * If we are returning from the outermost NMI handler that interrupted an | |
17147677 | 197 | * RCU-idle period, update ct->state and ct->dynticks_nmi_nesting |
17211455 FW |
198 | * to let the RCU grace-period handling know that the CPU is back to |
199 | * being RCU-idle. | |
200 | * | |
c33ef43a | 201 | * If you add or remove a call to ct_nmi_exit(), be sure to test |
17211455 FW |
202 | * with CONFIG_RCU_EQS_DEBUG=y. |
203 | */ | |
c33ef43a | 204 | void noinstr ct_nmi_exit(void) |
17211455 FW |
205 | { |
206 | struct context_tracking *ct = this_cpu_ptr(&context_tracking); | |
207 | ||
208 | instrumentation_begin(); | |
209 | /* | |
210 | * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. | |
211 | * (We are exiting an NMI handler, so RCU better be paying attention | |
212 | * to us!) | |
213 | */ | |
214 | WARN_ON_ONCE(ct_dynticks_nmi_nesting() <= 0); | |
215 | WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); | |
216 | ||
217 | /* | |
218 | * If the nesting level is not 1, the CPU wasn't RCU-idle, so | |
219 | * leave it in non-RCU-idle state. | |
220 | */ | |
221 | if (ct_dynticks_nmi_nesting() != 1) { | |
222 | trace_rcu_dyntick(TPS("--="), ct_dynticks_nmi_nesting(), ct_dynticks_nmi_nesting() - 2, | |
223 | ct_dynticks()); | |
224 | WRITE_ONCE(ct->dynticks_nmi_nesting, /* No store tearing. */ | |
225 | ct_dynticks_nmi_nesting() - 2); | |
226 | instrumentation_end(); | |
227 | return; | |
228 | } | |
229 | ||
230 | /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ | |
231 | trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks()); | |
232 | WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ | |
233 | ||
17147677 FW |
234 | // instrumentation for the noinstr ct_kernel_exit_state() |
235 | instrument_atomic_write(&ct->state, sizeof(ct->state)); | |
17211455 FW |
236 | instrumentation_end(); |
237 | ||
238 | // RCU is watching here ... | |
17147677 | 239 | ct_kernel_exit_state(RCU_DYNTICKS_IDX); |
17211455 FW |
240 | // ... but is no longer watching here. |
241 | ||
242 | if (!in_nmi()) | |
243 | rcu_dynticks_task_enter(); | |
244 | } | |
245 | ||
246 | /** | |
c33ef43a | 247 | * ct_nmi_enter - inform RCU of entry to NMI context |
17211455 | 248 | * |
17147677 | 249 | * If the CPU was idle from RCU's viewpoint, update ct->state and |
17211455 FW |
250 | * ct->dynticks_nmi_nesting to let the RCU grace-period handling know |
251 | * that the CPU is active. This implementation permits nested NMIs, as | |
252 | * long as the nesting level does not overflow an int. (You will probably | |
253 | * run out of stack space first.) | |
254 | * | |
c33ef43a | 255 | * If you add or remove a call to ct_nmi_enter(), be sure to test |
17211455 FW |
256 | * with CONFIG_RCU_EQS_DEBUG=y. |
257 | */ | |
c33ef43a | 258 | void noinstr ct_nmi_enter(void) |
17211455 FW |
259 | { |
260 | long incby = 2; | |
261 | struct context_tracking *ct = this_cpu_ptr(&context_tracking); | |
262 | ||
263 | /* Complain about underflow. */ | |
264 | WARN_ON_ONCE(ct_dynticks_nmi_nesting() < 0); | |
265 | ||
266 | /* | |
267 | * If idle from RCU viewpoint, atomically increment ->dynticks | |
268 | * to mark non-idle and increment ->dynticks_nmi_nesting by one. | |
269 | * Otherwise, increment ->dynticks_nmi_nesting by two. This means | |
270 | * if ->dynticks_nmi_nesting is equal to one, we are guaranteed | |
271 | * to be in the outermost NMI handler that interrupted an RCU-idle | |
272 | * period (observation due to Andy Lutomirski). | |
273 | */ | |
274 | if (rcu_dynticks_curr_cpu_in_eqs()) { | |
275 | ||
276 | if (!in_nmi()) | |
277 | rcu_dynticks_task_exit(); | |
278 | ||
279 | // RCU is not watching here ... | |
17147677 | 280 | ct_kernel_enter_state(RCU_DYNTICKS_IDX); |
17211455 FW |
281 | // ... but is watching here. |
282 | ||
283 | instrumentation_begin(); | |
284 | // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs() | |
17147677 FW |
285 | instrument_atomic_read(&ct->state, sizeof(ct->state)); |
286 | // instrumentation for the noinstr ct_kernel_enter_state() | |
287 | instrument_atomic_write(&ct->state, sizeof(ct->state)); | |
17211455 FW |
288 | |
289 | incby = 1; | |
290 | } else if (!in_nmi()) { | |
291 | instrumentation_begin(); | |
292 | rcu_irq_enter_check_tick(); | |
293 | } else { | |
294 | instrumentation_begin(); | |
295 | } | |
296 | ||
297 | trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), | |
298 | ct_dynticks_nmi_nesting(), | |
299 | ct_dynticks_nmi_nesting() + incby, ct_dynticks()); | |
300 | instrumentation_end(); | |
301 | WRITE_ONCE(ct->dynticks_nmi_nesting, /* Prevent store tearing. */ | |
302 | ct_dynticks_nmi_nesting() + incby); | |
303 | barrier(); | |
304 | } | |
305 | ||
306 | /** | |
c33ef43a | 307 | * ct_idle_enter - inform RCU that current CPU is entering idle |
17211455 FW |
308 | * |
309 | * Enter idle mode, in other words, -leave- the mode in which RCU | |
310 | * read-side critical sections can occur. (Though RCU read-side | |
311 | * critical sections can occur in irq handlers in idle, a possibility | |
312 | * handled by irq_enter() and irq_exit().) | |
313 | * | |
c33ef43a | 314 | * If you add or remove a call to ct_idle_enter(), be sure to test with |
17211455 FW |
315 | * CONFIG_RCU_EQS_DEBUG=y. |
316 | */ | |
c33ef43a | 317 | void noinstr ct_idle_enter(void) |
17211455 FW |
318 | { |
319 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled()); | |
17147677 | 320 | ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE); |
17211455 | 321 | } |
c33ef43a | 322 | EXPORT_SYMBOL_GPL(ct_idle_enter); |
17211455 FW |
323 | |
324 | /** | |
c33ef43a | 325 | * ct_idle_exit - inform RCU that current CPU is leaving idle |
17211455 FW |
326 | * |
327 | * Exit idle mode, in other words, -enter- the mode in which RCU | |
328 | * read-side critical sections can occur. | |
329 | * | |
c33ef43a | 330 | * If you add or remove a call to ct_idle_exit(), be sure to test with |
17211455 FW |
331 | * CONFIG_RCU_EQS_DEBUG=y. |
332 | */ | |
c33ef43a | 333 | void noinstr ct_idle_exit(void) |
17211455 FW |
334 | { |
335 | unsigned long flags; | |
336 | ||
337 | raw_local_irq_save(flags); | |
17147677 | 338 | ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE); |
17211455 FW |
339 | raw_local_irq_restore(flags); |
340 | } | |
e67198cc | 341 | EXPORT_SYMBOL_GPL(ct_idle_exit); |
6f0e6c15 | 342 | |
3864caaf FW |
343 | /** |
344 | * ct_irq_enter - inform RCU that current CPU is entering irq away from idle | |
345 | * | |
346 | * Enter an interrupt handler, which might possibly result in exiting | |
347 | * idle mode, in other words, entering the mode in which read-side critical | |
348 | * sections can occur. The caller must have disabled interrupts. | |
349 | * | |
350 | * Note that the Linux kernel is fully capable of entering an interrupt | |
351 | * handler that it never exits, for example when doing upcalls to user mode! | |
352 | * This code assumes that the idle loop never does upcalls to user mode. | |
353 | * If your architecture's idle loop does do upcalls to user mode (or does | |
354 | * anything else that results in unbalanced calls to the irq_enter() and | |
355 | * irq_exit() functions), RCU will give you what you deserve, good and hard. | |
356 | * But very infrequently and irreproducibly. | |
357 | * | |
358 | * Use things like work queues to work around this limitation. | |
359 | * | |
360 | * You have been warned. | |
361 | * | |
362 | * If you add or remove a call to ct_irq_enter(), be sure to test with | |
363 | * CONFIG_RCU_EQS_DEBUG=y. | |
364 | */ | |
6f0e6c15 FW |
365 | noinstr void ct_irq_enter(void) |
366 | { | |
3864caaf FW |
367 | lockdep_assert_irqs_disabled(); |
368 | ct_nmi_enter(); | |
6f0e6c15 FW |
369 | } |
370 | ||
3864caaf FW |
371 | /** |
372 | * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle | |
373 | * | |
374 | * Exit from an interrupt handler, which might possibly result in entering | |
375 | * idle mode, in other words, leaving the mode in which read-side critical | |
376 | * sections can occur. The caller must have disabled interrupts. | |
377 | * | |
378 | * This code assumes that the idle loop never does anything that might | |
379 | * result in unbalanced calls to irq_enter() and irq_exit(). If your | |
380 | * architecture's idle loop violates this assumption, RCU will give you what | |
381 | * you deserve, good and hard. But very infrequently and irreproducibly. | |
382 | * | |
383 | * Use things like work queues to work around this limitation. | |
384 | * | |
385 | * You have been warned. | |
386 | * | |
387 | * If you add or remove a call to ct_irq_exit(), be sure to test with | |
388 | * CONFIG_RCU_EQS_DEBUG=y. | |
389 | */ | |
6f0e6c15 FW |
390 | noinstr void ct_irq_exit(void) |
391 | { | |
3864caaf FW |
392 | lockdep_assert_irqs_disabled(); |
393 | ct_nmi_exit(); | |
6f0e6c15 FW |
394 | } |
395 | ||
3864caaf FW |
396 | /* |
397 | * Wrapper for ct_irq_enter() where interrupts are enabled. | |
398 | * | |
399 | * If you add or remove a call to ct_irq_enter_irqson(), be sure to test | |
400 | * with CONFIG_RCU_EQS_DEBUG=y. | |
401 | */ | |
6f0e6c15 FW |
402 | void ct_irq_enter_irqson(void) |
403 | { | |
3864caaf FW |
404 | unsigned long flags; |
405 | ||
406 | local_irq_save(flags); | |
407 | ct_irq_enter(); | |
408 | local_irq_restore(flags); | |
6f0e6c15 FW |
409 | } |
410 | ||
3864caaf FW |
411 | /* |
412 | * Wrapper for ct_irq_exit() where interrupts are enabled. | |
413 | * | |
414 | * If you add or remove a call to ct_irq_exit_irqson(), be sure to test | |
415 | * with CONFIG_RCU_EQS_DEBUG=y. | |
416 | */ | |
6f0e6c15 FW |
417 | void ct_irq_exit_irqson(void) |
418 | { | |
3864caaf FW |
419 | unsigned long flags; |
420 | ||
421 | local_irq_save(flags); | |
422 | ct_irq_exit(); | |
423 | local_irq_restore(flags); | |
6f0e6c15 | 424 | } |
c33ef43a | 425 | #else |
17147677 FW |
426 | static __always_inline void ct_kernel_exit(bool user, int offset) { } |
427 | static __always_inline void ct_kernel_enter(bool user, int offset) { } | |
e67198cc FW |
428 | #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ |
429 | ||
24a9c541 FW |
430 | #ifdef CONFIG_CONTEXT_TRACKING_USER |
431 | ||
1b6a259a FW |
432 | #define CREATE_TRACE_POINTS |
433 | #include <trace/events/context_tracking.h> | |
434 | ||
74c57875 FW |
435 | DEFINE_STATIC_KEY_FALSE(context_tracking_key); |
436 | EXPORT_SYMBOL_GPL(context_tracking_key); | |
65f382fd | 437 | |
0372007f | 438 | static noinstr bool context_tracking_recursion_enter(void) |
aed5ed47 FW |
439 | { |
440 | int recursion; | |
441 | ||
442 | recursion = __this_cpu_inc_return(context_tracking.recursion); | |
443 | if (recursion == 1) | |
444 | return true; | |
445 | ||
446 | WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion); | |
447 | __this_cpu_dec(context_tracking.recursion); | |
448 | ||
449 | return false; | |
450 | } | |
451 | ||
0372007f | 452 | static __always_inline void context_tracking_recursion_exit(void) |
aed5ed47 FW |
453 | { |
454 | __this_cpu_dec(context_tracking.recursion); | |
455 | } | |
456 | ||
4eacdf18 | 457 | /** |
0ffc781a FW |
458 | * __ct_user_enter - Inform the context tracking that the CPU is going |
459 | * to enter user or guest space mode. | |
4eacdf18 | 460 | * |
3b239b30 PM |
461 | * @state: userspace context-tracking state to enter. |
462 | * | |
4eacdf18 | 463 | * This function must be called right before we switch from the kernel |
3aab4f50 RR |
464 | * to user or guest space, when it's guaranteed the remaining kernel |
465 | * instructions to execute won't use any RCU read side critical section | |
466 | * because this function sets RCU in extended quiescent state. | |
4eacdf18 | 467 | */ |
0ffc781a | 468 | void noinstr __ct_user_enter(enum ctx_state state) |
91d1aa43 | 469 | { |
17147677 | 470 | struct context_tracking *ct = this_cpu_ptr(&context_tracking); |
56450649 FW |
471 | lockdep_assert_irqs_disabled(); |
472 | ||
4eacdf18 | 473 | /* Kernel threads aren't supposed to go to userspace */ |
91d1aa43 FW |
474 | WARN_ON_ONCE(!current->mm); |
475 | ||
aed5ed47 | 476 | if (!context_tracking_recursion_enter()) |
d0e536d8 | 477 | return; |
aed5ed47 | 478 | |
17147677 FW |
479 | if (__ct_state() != state) { |
480 | if (ct->active) { | |
d65ec121 FW |
481 | /* |
482 | * At this stage, only low level arch entry code remains and | |
483 | * then we'll run in userspace. We can assume there won't be | |
484 | * any RCU read-side critical section until the next call to | |
6f0e6c15 | 485 | * user_exit() or ct_irq_enter(). Let's remove RCU's dependency |
d65ec121 FW |
486 | * on the tick. |
487 | */ | |
19fdd98b | 488 | if (state == CONTEXT_USER) { |
0372007f | 489 | instrumentation_begin(); |
19fdd98b RR |
490 | trace_user_enter(0); |
491 | vtime_user_enter(current); | |
0372007f | 492 | instrumentation_end(); |
19fdd98b | 493 | } |
56450649 FW |
494 | /* |
495 | * Other than generic entry implementation, we may be past the last | |
496 | * rescheduling opportunity in the entry code. Trigger a self IPI | |
497 | * that will fire and reschedule once we resume in user/guest mode. | |
498 | */ | |
499 | rcu_irq_work_resched(); | |
17147677 | 500 | |
c33ef43a FW |
501 | /* |
502 | * Enter RCU idle mode right before resuming userspace. No use of RCU | |
503 | * is permitted between this call and rcu_eqs_exit(). This way the | |
504 | * CPU doesn't need to maintain the tick for RCU maintenance purposes | |
505 | * when the CPU runs in userspace. | |
506 | */ | |
17147677 FW |
507 | ct_kernel_exit(true, RCU_DYNTICKS_IDX + state); |
508 | ||
509 | /* | |
510 | * Special case if we only track user <-> kernel transitions for tickless | |
511 | * cputime accounting but we don't support RCU extended quiescent state. | |
512 | * In this we case we don't care about any concurrency/ordering. | |
513 | */ | |
514 | if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) | |
0f613bfa | 515 | raw_atomic_set(&ct->state, state); |
17147677 FW |
516 | } else { |
517 | /* | |
518 | * Even if context tracking is disabled on this CPU, because it's outside | |
519 | * the full dynticks mask for example, we still have to keep track of the | |
520 | * context transitions and states to prevent inconsistency on those of | |
521 | * other CPUs. | |
522 | * If a task triggers an exception in userspace, sleep on the exception | |
523 | * handler and then migrate to another CPU, that new CPU must know where | |
524 | * the exception returns by the time we call exception_exit(). | |
525 | * This information can only be provided by the previous CPU when it called | |
526 | * exception_enter(). | |
527 | * OTOH we can spare the calls to vtime and RCU when context_tracking.active | |
528 | * is false because we know that CPU is not tickless. | |
529 | */ | |
530 | if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) { | |
531 | /* Tracking for vtime only, no concurrent RCU EQS accounting */ | |
0f613bfa | 532 | raw_atomic_set(&ct->state, state); |
17147677 FW |
533 | } else { |
534 | /* | |
535 | * Tracking for vtime and RCU EQS. Make sure we don't race | |
536 | * with NMIs. OTOH we don't care about ordering here since | |
537 | * RCU only requires RCU_DYNTICKS_IDX increments to be fully | |
538 | * ordered. | |
539 | */ | |
0f613bfa | 540 | raw_atomic_add(state, &ct->state); |
17147677 | 541 | } |
d65ec121 | 542 | } |
91d1aa43 | 543 | } |
aed5ed47 | 544 | context_tracking_recursion_exit(); |
d0e536d8 | 545 | } |
0ffc781a | 546 | EXPORT_SYMBOL_GPL(__ct_user_enter); |
d0e536d8 | 547 | |
f67671ba FW |
548 | /* |
549 | * OBSOLETE: | |
550 | * This function should be noinstr but the below local_irq_restore() is | |
551 | * unsafe because it involves illegal RCU uses through tracing and lockdep. | |
552 | * This is unlikely to be fixed as this function is obsolete. The preferred | |
553 | * way is to call __context_tracking_enter() through user_enter_irqoff() | |
554 | * or context_tracking_guest_enter(). It should be the arch entry code | |
555 | * responsibility to call into context tracking with IRQs disabled. | |
556 | */ | |
fe98db1c | 557 | void ct_user_enter(enum ctx_state state) |
d0e536d8 PB |
558 | { |
559 | unsigned long flags; | |
560 | ||
561 | /* | |
562 | * Some contexts may involve an exception occuring in an irq, | |
563 | * leading to that nesting: | |
c33ef43a | 564 | * ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit() |
d0e536d8 PB |
565 | * This would mess up the dyntick_nesting count though. And rcu_irq_*() |
566 | * helpers are enough to protect RCU uses inside the exception. So | |
567 | * just return immediately if we detect we are in an IRQ. | |
568 | */ | |
569 | if (in_interrupt()) | |
570 | return; | |
571 | ||
572 | local_irq_save(flags); | |
0ffc781a | 573 | __ct_user_enter(state); |
91d1aa43 FW |
574 | local_irq_restore(flags); |
575 | } | |
fe98db1c FW |
576 | NOKPROBE_SYMBOL(ct_user_enter); |
577 | EXPORT_SYMBOL_GPL(ct_user_enter); | |
3aab4f50 | 578 | |
f163f030 FW |
579 | /** |
580 | * user_enter_callable() - Unfortunate ASM callable version of user_enter() for | |
581 | * archs that didn't manage to check the context tracking | |
582 | * static key from low level code. | |
583 | * | |
584 | * This OBSOLETE function should be noinstr but it unsafely calls | |
585 | * local_irq_restore(), involving illegal RCU uses through tracing and lockdep. | |
f67671ba FW |
586 | * This is unlikely to be fixed as this function is obsolete. The preferred |
587 | * way is to call user_enter_irqoff(). It should be the arch entry code | |
588 | * responsibility to call into context tracking with IRQs disabled. | |
589 | */ | |
f163f030 | 590 | void user_enter_callable(void) |
3aab4f50 | 591 | { |
f70cd6b0 | 592 | user_enter(); |
3aab4f50 | 593 | } |
f163f030 | 594 | NOKPROBE_SYMBOL(user_enter_callable); |
91d1aa43 | 595 | |
4eacdf18 | 596 | /** |
0ffc781a FW |
597 | * __ct_user_exit - Inform the context tracking that the CPU is |
598 | * exiting user or guest mode and entering the kernel. | |
4eacdf18 | 599 | * |
3b239b30 PM |
600 | * @state: userspace context-tracking state being exited from. |
601 | * | |
3aab4f50 RR |
602 | * This function must be called after we entered the kernel from user or |
603 | * guest space before any use of RCU read side critical section. This | |
604 | * potentially include any high level kernel code like syscalls, exceptions, | |
605 | * signal handling, etc... | |
4eacdf18 FW |
606 | * |
607 | * This call supports re-entrancy. This way it can be called from any exception | |
608 | * handler without needing to know if we came from userspace or not. | |
609 | */ | |
0ffc781a | 610 | void noinstr __ct_user_exit(enum ctx_state state) |
91d1aa43 | 611 | { |
17147677 FW |
612 | struct context_tracking *ct = this_cpu_ptr(&context_tracking); |
613 | ||
aed5ed47 | 614 | if (!context_tracking_recursion_enter()) |
d0e536d8 | 615 | return; |
aed5ed47 | 616 | |
17147677 FW |
617 | if (__ct_state() == state) { |
618 | if (ct->active) { | |
d65ec121 | 619 | /* |
c33ef43a FW |
620 | * Exit RCU idle mode while entering the kernel because it can |
621 | * run a RCU read side critical section anytime. | |
d65ec121 | 622 | */ |
17147677 | 623 | ct_kernel_enter(true, RCU_DYNTICKS_IDX - state); |
19fdd98b | 624 | if (state == CONTEXT_USER) { |
0372007f | 625 | instrumentation_begin(); |
19fdd98b RR |
626 | vtime_user_exit(current); |
627 | trace_user_exit(0); | |
0372007f | 628 | instrumentation_end(); |
19fdd98b | 629 | } |
17147677 FW |
630 | |
631 | /* | |
632 | * Special case if we only track user <-> kernel transitions for tickless | |
633 | * cputime accounting but we don't support RCU extended quiescent state. | |
634 | * In this we case we don't care about any concurrency/ordering. | |
635 | */ | |
636 | if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) | |
0f613bfa | 637 | raw_atomic_set(&ct->state, CONTEXT_KERNEL); |
17147677 FW |
638 | |
639 | } else { | |
640 | if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) { | |
641 | /* Tracking for vtime only, no concurrent RCU EQS accounting */ | |
0f613bfa | 642 | raw_atomic_set(&ct->state, CONTEXT_KERNEL); |
17147677 FW |
643 | } else { |
644 | /* | |
645 | * Tracking for vtime and RCU EQS. Make sure we don't race | |
646 | * with NMIs. OTOH we don't care about ordering here since | |
647 | * RCU only requires RCU_DYNTICKS_IDX increments to be fully | |
648 | * ordered. | |
649 | */ | |
0f613bfa | 650 | raw_atomic_sub(state, &ct->state); |
17147677 | 651 | } |
d65ec121 | 652 | } |
91d1aa43 | 653 | } |
aed5ed47 | 654 | context_tracking_recursion_exit(); |
d0e536d8 | 655 | } |
0ffc781a | 656 | EXPORT_SYMBOL_GPL(__ct_user_exit); |
d0e536d8 | 657 | |
f67671ba FW |
658 | /* |
659 | * OBSOLETE: | |
660 | * This function should be noinstr but the below local_irq_save() is | |
661 | * unsafe because it involves illegal RCU uses through tracing and lockdep. | |
662 | * This is unlikely to be fixed as this function is obsolete. The preferred | |
663 | * way is to call __context_tracking_exit() through user_exit_irqoff() | |
664 | * or context_tracking_guest_exit(). It should be the arch entry code | |
665 | * responsibility to call into context tracking with IRQs disabled. | |
666 | */ | |
fe98db1c | 667 | void ct_user_exit(enum ctx_state state) |
d0e536d8 PB |
668 | { |
669 | unsigned long flags; | |
670 | ||
671 | if (in_interrupt()) | |
672 | return; | |
673 | ||
674 | local_irq_save(flags); | |
0ffc781a | 675 | __ct_user_exit(state); |
91d1aa43 FW |
676 | local_irq_restore(flags); |
677 | } | |
fe98db1c FW |
678 | NOKPROBE_SYMBOL(ct_user_exit); |
679 | EXPORT_SYMBOL_GPL(ct_user_exit); | |
3aab4f50 | 680 | |
f163f030 FW |
681 | /** |
682 | * user_exit_callable() - Unfortunate ASM callable version of user_exit() for | |
683 | * archs that didn't manage to check the context tracking | |
684 | * static key from low level code. | |
685 | * | |
686 | * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(), | |
f67671ba FW |
687 | * involving illegal RCU uses through tracing and lockdep. This is unlikely |
688 | * to be fixed as this function is obsolete. The preferred way is to call | |
689 | * user_exit_irqoff(). It should be the arch entry code responsibility to | |
690 | * call into context tracking with IRQs disabled. | |
691 | */ | |
f163f030 | 692 | void user_exit_callable(void) |
3aab4f50 | 693 | { |
f70cd6b0 | 694 | user_exit(); |
3aab4f50 | 695 | } |
f163f030 | 696 | NOKPROBE_SYMBOL(user_exit_callable); |
91d1aa43 | 697 | |
2a0aafce | 698 | void __init ct_cpu_track_user(int cpu) |
91d1aa43 | 699 | { |
fafe870f FW |
700 | static __initdata bool initialized = false; |
701 | ||
702 | if (!per_cpu(context_tracking.active, cpu)) { | |
703 | per_cpu(context_tracking.active, cpu) = true; | |
74c57875 | 704 | static_branch_inc(&context_tracking_key); |
fafe870f FW |
705 | } |
706 | ||
707 | if (initialized) | |
708 | return; | |
709 | ||
490f561b | 710 | #ifdef CONFIG_HAVE_TIF_NOHZ |
fafe870f FW |
711 | /* |
712 | * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork | |
713 | * This assumes that init is the only task at this early boot stage. | |
714 | */ | |
715 | set_tsk_thread_flag(&init_task, TIF_NOHZ); | |
490f561b | 716 | #endif |
fafe870f FW |
717 | WARN_ON_ONCE(!tasklist_empty()); |
718 | ||
719 | initialized = true; | |
91d1aa43 | 720 | } |
65f382fd | 721 | |
24a9c541 | 722 | #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE |
65f382fd FW |
723 | void __init context_tracking_init(void) |
724 | { | |
725 | int cpu; | |
726 | ||
727 | for_each_possible_cpu(cpu) | |
2a0aafce | 728 | ct_cpu_track_user(cpu); |
65f382fd FW |
729 | } |
730 | #endif | |
24a9c541 FW |
731 | |
732 | #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */ |