Merge tag 'trace-v5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux...
[linux-2.6-block.git] / kernel / time / tick-broadcast.c
CommitLineData
35728b82 1// SPDX-License-Identifier: GPL-2.0
f8381cba 2/*
f8381cba
TG
3 * This file contains functions which emulate a local clock-event
4 * device via a broadcast event source.
5 *
6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
f8381cba
TG
9 */
10#include <linux/cpu.h>
11#include <linux/err.h>
12#include <linux/hrtimer.h>
d7b90689 13#include <linux/interrupt.h>
f8381cba
TG
14#include <linux/percpu.h>
15#include <linux/profile.h>
16#include <linux/sched.h>
12ad1000 17#include <linux/smp.h>
ccf33d68 18#include <linux/module.h>
f8381cba
TG
19
20#include "tick-internal.h"
21
22/*
23 * Broadcast support for broken x86 hardware, where the local apic
24 * timer stops in C3 state.
25 */
26
a52f5c56 27static struct tick_device tick_broadcast_device;
668802c2
WL
28static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly;
29static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly;
30static cpumask_var_t tmpmask __cpumask_var_read_mostly;
592a438f 31static int tick_broadcast_forced;
f8381cba 32
668802c2
WL
33static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
34
5590a536 35#ifdef CONFIG_TICK_ONESHOT
94114c36 36static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
5590a536 37static void tick_broadcast_clear_oneshot(int cpu);
080873ce 38static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
5590a536 39#else
94114c36 40static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
5590a536 41static inline void tick_broadcast_clear_oneshot(int cpu) { }
080873ce 42static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
5590a536
TG
43#endif
44
289f480a
IM
45/*
46 * Debugging: see timer_list.c
47 */
48struct tick_device *tick_get_broadcast_device(void)
49{
50 return &tick_broadcast_device;
51}
52
6b954823 53struct cpumask *tick_get_broadcast_mask(void)
289f480a 54{
b352bc1c 55 return tick_broadcast_mask;
289f480a
IM
56}
57
f8381cba
TG
58/*
59 * Start the device in periodic mode
60 */
61static void tick_broadcast_start_periodic(struct clock_event_device *bc)
62{
18de5bc4 63 if (bc)
f8381cba
TG
64 tick_setup_periodic(bc, 1);
65}
66
67/*
68 * Check, if the device can be utilized as broadcast device:
69 */
45cb8e01
TG
70static bool tick_check_broadcast_device(struct clock_event_device *curdev,
71 struct clock_event_device *newdev)
72{
73 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
245a3496 74 (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
45cb8e01
TG
75 (newdev->features & CLOCK_EVT_FEAT_C3STOP))
76 return false;
77
78 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
79 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
80 return false;
81
82 return !curdev || newdev->rating > curdev->rating;
83}
84
85/*
86 * Conditionally install/replace broadcast device
87 */
7172a286 88void tick_install_broadcast_device(struct clock_event_device *dev)
f8381cba 89{
6f7a05d7
TG
90 struct clock_event_device *cur = tick_broadcast_device.evtdev;
91
45cb8e01 92 if (!tick_check_broadcast_device(cur, dev))
7172a286 93 return;
45cb8e01 94
ccf33d68
TG
95 if (!try_module_get(dev->owner))
96 return;
f8381cba 97
45cb8e01 98 clockevents_exchange_device(cur, dev);
6f7a05d7
TG
99 if (cur)
100 cur->event_handler = clockevents_handle_noop;
f8381cba 101 tick_broadcast_device.evtdev = dev;
b352bc1c 102 if (!cpumask_empty(tick_broadcast_mask))
f8381cba 103 tick_broadcast_start_periodic(dev);
c038c1c4
SB
104 /*
105 * Inform all cpus about this. We might be in a situation
106 * where we did not switch to oneshot mode because the per cpu
107 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
108 * of a oneshot capable broadcast device. Without that
109 * notification the systems stays stuck in periodic mode
110 * forever.
111 */
112 if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
113 tick_clock_notify();
f8381cba
TG
114}
115
116/*
117 * Check, if the device is the broadcast device
118 */
119int tick_is_broadcast_device(struct clock_event_device *dev)
120{
121 return (dev && tick_broadcast_device.evtdev == dev);
122}
123
627ee794
TG
124int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
125{
126 int ret = -ENODEV;
127
128 if (tick_is_broadcast_device(dev)) {
129 raw_spin_lock(&tick_broadcast_lock);
130 ret = __clockevents_update_freq(dev, freq);
131 raw_spin_unlock(&tick_broadcast_lock);
132 }
133 return ret;
134}
135
136
12ad1000
MR
137static void err_broadcast(const struct cpumask *mask)
138{
139 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
140}
141
5d1d9a29
MR
142static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
143{
144 if (!dev->broadcast)
145 dev->broadcast = tick_broadcast;
146 if (!dev->broadcast) {
147 pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
148 dev->name);
149 dev->broadcast = err_broadcast;
150 }
151}
152
f8381cba
TG
153/*
154 * Check, if the device is disfunctional and a place holder, which
155 * needs to be handled by the broadcast device.
156 */
157int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
158{
07bd1172 159 struct clock_event_device *bc = tick_broadcast_device.evtdev;
f8381cba 160 unsigned long flags;
e0454311 161 int ret = 0;
f8381cba 162
b5f91da0 163 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
f8381cba
TG
164
165 /*
166 * Devices might be registered with both periodic and oneshot
167 * mode disabled. This signals, that the device needs to be
168 * operated from the broadcast device and is a placeholder for
169 * the cpu local device.
170 */
171 if (!tick_device_is_functional(dev)) {
172 dev->event_handler = tick_handle_periodic;
5d1d9a29 173 tick_device_setup_broadcast_func(dev);
b352bc1c 174 cpumask_set_cpu(cpu, tick_broadcast_mask);
a272dcca
SB
175 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
176 tick_broadcast_start_periodic(bc);
177 else
178 tick_broadcast_setup_oneshot(bc);
f8381cba 179 ret = 1;
5590a536
TG
180 } else {
181 /*
07bd1172
TG
182 * Clear the broadcast bit for this cpu if the
183 * device is not power state affected.
5590a536 184 */
07bd1172 185 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
b352bc1c 186 cpumask_clear_cpu(cpu, tick_broadcast_mask);
07bd1172 187 else
5d1d9a29 188 tick_device_setup_broadcast_func(dev);
07bd1172
TG
189
190 /*
191 * Clear the broadcast bit if the CPU is not in
192 * periodic broadcast on state.
193 */
194 if (!cpumask_test_cpu(cpu, tick_broadcast_on))
195 cpumask_clear_cpu(cpu, tick_broadcast_mask);
196
197 switch (tick_broadcast_device.mode) {
198 case TICKDEV_MODE_ONESHOT:
199 /*
200 * If the system is in oneshot mode we can
201 * unconditionally clear the oneshot mask bit,
202 * because the CPU is running and therefore
203 * not in an idle state which causes the power
204 * state affected device to stop. Let the
205 * caller initialize the device.
206 */
207 tick_broadcast_clear_oneshot(cpu);
208 ret = 0;
209 break;
210
211 case TICKDEV_MODE_PERIODIC:
212 /*
213 * If the system is in periodic mode, check
214 * whether the broadcast device can be
215 * switched off now.
216 */
217 if (cpumask_empty(tick_broadcast_mask) && bc)
218 clockevents_shutdown(bc);
219 /*
220 * If we kept the cpu in the broadcast mask,
221 * tell the caller to leave the per cpu device
222 * in shutdown state. The periodic interrupt
e0454311
TG
223 * is delivered by the broadcast device, if
224 * the broadcast device exists and is not
225 * hrtimer based.
07bd1172 226 */
e0454311
TG
227 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
228 ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
07bd1172
TG
229 break;
230 default:
07bd1172 231 break;
5590a536
TG
232 }
233 }
b5f91da0 234 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
f8381cba
TG
235 return ret;
236}
237
12572dbb
MR
238#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
239int tick_receive_broadcast(void)
240{
241 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
242 struct clock_event_device *evt = td->evtdev;
243
244 if (!evt)
245 return -ENODEV;
246
247 if (!evt->event_handler)
248 return -EINVAL;
249
250 evt->event_handler(evt);
251 return 0;
252}
253#endif
254
f8381cba 255/*
6b954823 256 * Broadcast the event to the cpus, which are set in the mask (mangled).
f8381cba 257 */
2951d5c0 258static bool tick_do_broadcast(struct cpumask *mask)
f8381cba 259{
186e3cb8 260 int cpu = smp_processor_id();
f8381cba 261 struct tick_device *td;
2951d5c0 262 bool local = false;
f8381cba
TG
263
264 /*
265 * Check, if the current cpu is in the mask
266 */
6b954823 267 if (cpumask_test_cpu(cpu, mask)) {
8eb23126
TG
268 struct clock_event_device *bc = tick_broadcast_device.evtdev;
269
6b954823 270 cpumask_clear_cpu(cpu, mask);
8eb23126
TG
271 /*
272 * We only run the local handler, if the broadcast
273 * device is not hrtimer based. Otherwise we run into
274 * a hrtimer recursion.
275 *
276 * local timer_interrupt()
277 * local_handler()
278 * expire_hrtimers()
279 * bc_handler()
280 * local_handler()
281 * expire_hrtimers()
282 */
283 local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
f8381cba
TG
284 }
285
6b954823 286 if (!cpumask_empty(mask)) {
f8381cba
TG
287 /*
288 * It might be necessary to actually check whether the devices
289 * have different broadcast functions. For now, just use the
290 * one of the first device. This works as long as we have this
291 * misfeature only on x86 (lapic)
292 */
6b954823
RR
293 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
294 td->evtdev->broadcast(mask);
f8381cba 295 }
2951d5c0 296 return local;
f8381cba
TG
297}
298
299/*
300 * Periodic broadcast:
301 * - invoke the broadcast handlers
302 */
2951d5c0 303static bool tick_do_periodic_broadcast(void)
f8381cba 304{
b352bc1c 305 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
2951d5c0 306 return tick_do_broadcast(tmpmask);
f8381cba
TG
307}
308
309/*
310 * Event handler for periodic broadcast ticks
311 */
312static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
313{
2951d5c0
TG
314 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
315 bool bc_local;
d4496b39 316
627ee794 317 raw_spin_lock(&tick_broadcast_lock);
c4288334
TG
318
319 /* Handle spurious interrupts gracefully */
320 if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
321 raw_spin_unlock(&tick_broadcast_lock);
322 return;
323 }
324
2951d5c0 325 bc_local = tick_do_periodic_broadcast();
627ee794 326
472c4a94 327 if (clockevent_state_oneshot(dev)) {
2951d5c0 328 ktime_t next = ktime_add(dev->next_event, tick_period);
f8381cba 329
2951d5c0
TG
330 clockevents_program_event(dev, next, true);
331 }
332 raw_spin_unlock(&tick_broadcast_lock);
f8381cba
TG
333
334 /*
2951d5c0
TG
335 * We run the handler of the local cpu after dropping
336 * tick_broadcast_lock because the handler might deadlock when
337 * trying to switch to oneshot mode.
f8381cba 338 */
2951d5c0
TG
339 if (bc_local)
340 td->evtdev->event_handler(td->evtdev);
f8381cba
TG
341}
342
592a438f
TG
343/**
344 * tick_broadcast_control - Enable/disable or force broadcast mode
345 * @mode: The selected broadcast mode
346 *
347 * Called when the system enters a state where affected tick devices
348 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
f8381cba 349 */
592a438f 350void tick_broadcast_control(enum tick_broadcast_mode mode)
f8381cba
TG
351{
352 struct clock_event_device *bc, *dev;
353 struct tick_device *td;
9c17bcda 354 int cpu, bc_stopped;
202461e2 355 unsigned long flags;
f8381cba 356
202461e2
MG
357 /* Protects also the local clockevent device. */
358 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
592a438f 359 td = this_cpu_ptr(&tick_cpu_device);
f8381cba 360 dev = td->evtdev;
f8381cba
TG
361
362 /*
1595f452 363 * Is the device not affected by the powerstate ?
f8381cba 364 */
1595f452 365 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
202461e2 366 goto out;
f8381cba 367
3dfbc884 368 if (!tick_device_is_functional(dev))
202461e2 369 goto out;
1595f452 370
592a438f
TG
371 cpu = smp_processor_id();
372 bc = tick_broadcast_device.evtdev;
b352bc1c 373 bc_stopped = cpumask_empty(tick_broadcast_mask);
9c17bcda 374
592a438f
TG
375 switch (mode) {
376 case TICK_BROADCAST_FORCE:
377 tick_broadcast_forced = 1;
75b710af 378 /* fall through */
592a438f 379 case TICK_BROADCAST_ON:
07bd1172 380 cpumask_set_cpu(cpu, tick_broadcast_on);
b352bc1c 381 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
e0454311
TG
382 /*
383 * Only shutdown the cpu local device, if:
384 *
385 * - the broadcast device exists
386 * - the broadcast device is not a hrtimer based one
387 * - the broadcast device is in periodic mode to
388 * avoid a hickup during switch to oneshot mode
389 */
390 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
391 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
2344abbc 392 clockevents_shutdown(dev);
f8381cba 393 }
1595f452 394 break;
592a438f
TG
395
396 case TICK_BROADCAST_OFF:
397 if (tick_broadcast_forced)
07bd1172
TG
398 break;
399 cpumask_clear_cpu(cpu, tick_broadcast_on);
07bd1172 400 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
07454bff
TG
401 if (tick_broadcast_device.mode ==
402 TICKDEV_MODE_PERIODIC)
f8381cba
TG
403 tick_setup_periodic(dev, 0);
404 }
1595f452 405 break;
f8381cba
TG
406 }
407
c4d029f2
TG
408 if (bc) {
409 if (cpumask_empty(tick_broadcast_mask)) {
410 if (!bc_stopped)
411 clockevents_shutdown(bc);
412 } else if (bc_stopped) {
413 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
414 tick_broadcast_start_periodic(bc);
415 else
416 tick_broadcast_setup_oneshot(bc);
417 }
f8381cba 418 }
202461e2
MG
419out:
420 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
f8381cba 421}
592a438f 422EXPORT_SYMBOL_GPL(tick_broadcast_control);
f8381cba
TG
423
424/*
425 * Set the periodic handler depending on broadcast on/off
426 */
427void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
428{
429 if (!broadcast)
430 dev->event_handler = tick_handle_periodic;
431 else
432 dev->event_handler = tick_handle_periodic_broadcast;
433}
434
a49b116d 435#ifdef CONFIG_HOTPLUG_CPU
f8381cba
TG
436/*
437 * Remove a CPU from broadcasting
438 */
a49b116d 439void tick_shutdown_broadcast(unsigned int cpu)
f8381cba
TG
440{
441 struct clock_event_device *bc;
442 unsigned long flags;
f8381cba 443
b5f91da0 444 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
f8381cba
TG
445
446 bc = tick_broadcast_device.evtdev;
b352bc1c 447 cpumask_clear_cpu(cpu, tick_broadcast_mask);
07bd1172 448 cpumask_clear_cpu(cpu, tick_broadcast_on);
f8381cba
TG
449
450 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
b352bc1c 451 if (bc && cpumask_empty(tick_broadcast_mask))
2344abbc 452 clockevents_shutdown(bc);
f8381cba
TG
453 }
454
b5f91da0 455 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
f8381cba 456}
a49b116d 457#endif
79bf2bb3 458
6321dd60
TG
459void tick_suspend_broadcast(void)
460{
461 struct clock_event_device *bc;
462 unsigned long flags;
463
b5f91da0 464 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
6321dd60
TG
465
466 bc = tick_broadcast_device.evtdev;
18de5bc4 467 if (bc)
2344abbc 468 clockevents_shutdown(bc);
6321dd60 469
b5f91da0 470 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
6321dd60
TG
471}
472
f46481d0
TG
473/*
474 * This is called from tick_resume_local() on a resuming CPU. That's
475 * called from the core resume function, tick_unfreeze() and the magic XEN
476 * resume hackery.
477 *
478 * In none of these cases the broadcast device mode can change and the
479 * bit of the resuming CPU in the broadcast mask is safe as well.
480 */
481bool tick_resume_check_broadcast(void)
482{
483 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
484 return false;
485 else
486 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
487}
488
489void tick_resume_broadcast(void)
6321dd60
TG
490{
491 struct clock_event_device *bc;
492 unsigned long flags;
6321dd60 493
b5f91da0 494 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
6321dd60
TG
495
496 bc = tick_broadcast_device.evtdev;
6321dd60 497
cd05a1f8 498 if (bc) {
554ef387 499 clockevents_tick_resume(bc);
18de5bc4 500
cd05a1f8
TG
501 switch (tick_broadcast_device.mode) {
502 case TICKDEV_MODE_PERIODIC:
b352bc1c 503 if (!cpumask_empty(tick_broadcast_mask))
cd05a1f8 504 tick_broadcast_start_periodic(bc);
cd05a1f8
TG
505 break;
506 case TICKDEV_MODE_ONESHOT:
b352bc1c 507 if (!cpumask_empty(tick_broadcast_mask))
080873ce 508 tick_resume_broadcast_oneshot(bc);
cd05a1f8
TG
509 break;
510 }
6321dd60 511 }
b5f91da0 512 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
6321dd60
TG
513}
514
79bf2bb3
TG
515#ifdef CONFIG_TICK_ONESHOT
516
668802c2
WL
517static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly;
518static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly;
519static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly;
79bf2bb3 520
289f480a 521/*
6b954823 522 * Exposed for debugging: see timer_list.c
289f480a 523 */
6b954823 524struct cpumask *tick_get_broadcast_oneshot_mask(void)
289f480a 525{
b352bc1c 526 return tick_broadcast_oneshot_mask;
289f480a
IM
527}
528
eaa907c5
TG
529/*
530 * Called before going idle with interrupts disabled. Checks whether a
531 * broadcast event from the other core is about to happen. We detected
532 * that in tick_broadcast_oneshot_control(). The callsite can use this
533 * to avoid a deep idle transition as we are about to get the
534 * broadcast IPI right away.
535 */
536int tick_check_broadcast_expired(void)
537{
538 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
539}
540
d2348fb6
DL
541/*
542 * Set broadcast interrupt affinity
543 */
544static void tick_broadcast_set_affinity(struct clock_event_device *bc,
545 const struct cpumask *cpumask)
546{
547 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
548 return;
549
550 if (cpumask_equal(bc->cpumask, cpumask))
551 return;
552
553 bc->cpumask = cpumask;
554 irq_set_affinity(bc->irq, bc->cpumask);
555}
556
298dbd1c
TG
557static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
558 ktime_t expires)
79bf2bb3 559{
472c4a94 560 if (!clockevent_state_oneshot(bc))
d7eb231c 561 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
b9a6a235 562
298dbd1c
TG
563 clockevents_program_event(bc, expires, 1);
564 tick_broadcast_set_affinity(bc, cpumask_of(cpu));
79bf2bb3
TG
565}
566
080873ce 567static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
cd05a1f8 568{
d7eb231c 569 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
cd05a1f8
TG
570}
571
fb02fbc1
TG
572/*
573 * Called from irq_enter() when idle was interrupted to reenable the
574 * per cpu device.
575 */
e8fcaa5c 576void tick_check_oneshot_broadcast_this_cpu(void)
fb02fbc1 577{
e8fcaa5c 578 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
22127e93 579 struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
fb02fbc1 580
1f73a980
TG
581 /*
582 * We might be in the middle of switching over from
583 * periodic to oneshot. If the CPU has not yet
584 * switched over, leave the device alone.
585 */
586 if (td->mode == TICKDEV_MODE_ONESHOT) {
d7eb231c 587 clockevents_switch_state(td->evtdev,
77e32c89 588 CLOCK_EVT_STATE_ONESHOT);
1f73a980 589 }
fb02fbc1
TG
590 }
591}
592
79bf2bb3
TG
593/*
594 * Handle oneshot mode broadcasting
595 */
596static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
597{
598 struct tick_device *td;
cdc6f27d 599 ktime_t now, next_event;
d2348fb6 600 int cpu, next_cpu = 0;
298dbd1c 601 bool bc_local;
79bf2bb3 602
b5f91da0 603 raw_spin_lock(&tick_broadcast_lock);
2456e855
TG
604 dev->next_event = KTIME_MAX;
605 next_event = KTIME_MAX;
b352bc1c 606 cpumask_clear(tmpmask);
79bf2bb3
TG
607 now = ktime_get();
608 /* Find all expired events */
b352bc1c 609 for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
5596fe34
DC
610 /*
611 * Required for !SMP because for_each_cpu() reports
612 * unconditionally CPU0 as set on UP kernels.
613 */
614 if (!IS_ENABLED(CONFIG_SMP) &&
615 cpumask_empty(tick_broadcast_oneshot_mask))
616 break;
617
79bf2bb3 618 td = &per_cpu(tick_cpu_device, cpu);
2456e855 619 if (td->evtdev->next_event <= now) {
b352bc1c 620 cpumask_set_cpu(cpu, tmpmask);
26517f3e
TG
621 /*
622 * Mark the remote cpu in the pending mask, so
623 * it can avoid reprogramming the cpu local
624 * timer in tick_broadcast_oneshot_control().
625 */
626 cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
2456e855
TG
627 } else if (td->evtdev->next_event < next_event) {
628 next_event = td->evtdev->next_event;
d2348fb6
DL
629 next_cpu = cpu;
630 }
79bf2bb3
TG
631 }
632
2938d275
TG
633 /*
634 * Remove the current cpu from the pending mask. The event is
635 * delivered immediately in tick_do_broadcast() !
636 */
637 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
638
989dcb64
TG
639 /* Take care of enforced broadcast requests */
640 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
641 cpumask_clear(tick_broadcast_force_mask);
642
c9b5a266
TG
643 /*
644 * Sanity check. Catch the case where we try to broadcast to
645 * offline cpus.
646 */
647 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
648 cpumask_and(tmpmask, tmpmask, cpu_online_mask);
649
79bf2bb3 650 /*
298dbd1c 651 * Wakeup the cpus which have an expired event.
cdc6f27d 652 */
298dbd1c 653 bc_local = tick_do_broadcast(tmpmask);
cdc6f27d
TG
654
655 /*
656 * Two reasons for reprogram:
657 *
658 * - The global event did not expire any CPU local
659 * events. This happens in dyntick mode, as the maximum PIT
660 * delta is quite small.
661 *
662 * - There are pending events on sleeping CPUs which were not
663 * in the event mask
79bf2bb3 664 */
2456e855 665 if (next_event != KTIME_MAX)
298dbd1c
TG
666 tick_broadcast_set_event(dev, next_cpu, next_event);
667
b5f91da0 668 raw_spin_unlock(&tick_broadcast_lock);
298dbd1c
TG
669
670 if (bc_local) {
671 td = this_cpu_ptr(&tick_cpu_device);
672 td->evtdev->event_handler(td->evtdev);
673 }
79bf2bb3
TG
674}
675
5d1638ac
PM
676static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
677{
678 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
679 return 0;
2456e855 680 if (bc->next_event == KTIME_MAX)
5d1638ac
PM
681 return 0;
682 return bc->bound_on == cpu ? -EBUSY : 0;
683}
684
685static void broadcast_shutdown_local(struct clock_event_device *bc,
686 struct clock_event_device *dev)
687{
688 /*
689 * For hrtimer based broadcasting we cannot shutdown the cpu
690 * local device if our own event is the first one to expire or
691 * if we own the broadcast timer.
692 */
693 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
694 if (broadcast_needs_cpu(bc, smp_processor_id()))
695 return;
2456e855 696 if (dev->next_event < bc->next_event)
5d1638ac
PM
697 return;
698 }
d7eb231c 699 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
5d1638ac
PM
700}
701
f32dd117 702int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
79bf2bb3
TG
703{
704 struct clock_event_device *bc, *dev;
da7e6f45 705 int cpu, ret = 0;
1fe5d5c3 706 ktime_t now;
79bf2bb3 707
b78f3f3c
TG
708 /*
709 * If there is no broadcast device, tell the caller not to go
710 * into deep idle.
711 */
712 if (!tick_broadcast_device.evtdev)
713 return -EBUSY;
714
e3ac79e0 715 dev = this_cpu_ptr(&tick_cpu_device)->evtdev;
79bf2bb3 716
1fe5d5c3 717 raw_spin_lock(&tick_broadcast_lock);
7372b0b1 718 bc = tick_broadcast_device.evtdev;
1fe5d5c3 719 cpu = smp_processor_id();
79bf2bb3 720
1fe5d5c3 721 if (state == TICK_BROADCAST_ENTER) {
d5113e13
TG
722 /*
723 * If the current CPU owns the hrtimer broadcast
724 * mechanism, it cannot go deep idle and we do not add
725 * the CPU to the broadcast mask. We don't have to go
726 * through the EXIT path as the local timer is not
727 * shutdown.
728 */
729 ret = broadcast_needs_cpu(bc, cpu);
730 if (ret)
731 goto out;
732
e3ac79e0
TG
733 /*
734 * If the broadcast device is in periodic mode, we
735 * return.
736 */
d3325726
TG
737 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
738 /* If it is a hrtimer based broadcast, return busy */
739 if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
740 ret = -EBUSY;
e3ac79e0 741 goto out;
d3325726 742 }
e3ac79e0 743
b352bc1c 744 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
2938d275 745 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
d5113e13
TG
746
747 /* Conditionally shut down the local timer. */
5d1638ac 748 broadcast_shutdown_local(bc, dev);
d5113e13 749
989dcb64
TG
750 /*
751 * We only reprogram the broadcast timer if we
752 * did not mark ourself in the force mask and
753 * if the cpu local event is earlier than the
754 * broadcast event. If the current CPU is in
755 * the force mask, then we are going to be
0cc5281a
TG
756 * woken by the IPI right away; we return
757 * busy, so the CPU does not try to go deep
758 * idle.
989dcb64 759 */
0cc5281a
TG
760 if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
761 ret = -EBUSY;
2456e855 762 } else if (dev->next_event < bc->next_event) {
298dbd1c 763 tick_broadcast_set_event(bc, cpu, dev->next_event);
d5113e13
TG
764 /*
765 * In case of hrtimer broadcasts the
766 * programming might have moved the
767 * timer to this cpu. If yes, remove
768 * us from the broadcast mask and
769 * return busy.
770 */
771 ret = broadcast_needs_cpu(bc, cpu);
772 if (ret) {
773 cpumask_clear_cpu(cpu,
774 tick_broadcast_oneshot_mask);
775 }
0cc5281a 776 }
79bf2bb3
TG
777 }
778 } else {
b352bc1c 779 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
d7eb231c 780 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
26517f3e
TG
781 /*
782 * The cpu which was handling the broadcast
783 * timer marked this cpu in the broadcast
784 * pending mask and fired the broadcast
785 * IPI. So we are going to handle the expired
786 * event anyway via the broadcast IPI
787 * handler. No need to reprogram the timer
788 * with an already expired event.
789 */
790 if (cpumask_test_and_clear_cpu(cpu,
791 tick_broadcast_pending_mask))
792 goto out;
793
ea8deb8d
DL
794 /*
795 * Bail out if there is no next event.
796 */
2456e855 797 if (dev->next_event == KTIME_MAX)
ea8deb8d 798 goto out;
989dcb64
TG
799 /*
800 * If the pending bit is not set, then we are
801 * either the CPU handling the broadcast
802 * interrupt or we got woken by something else.
803 *
804 * We are not longer in the broadcast mask, so
805 * if the cpu local expiry time is already
806 * reached, we would reprogram the cpu local
807 * timer with an already expired event.
808 *
809 * This can lead to a ping-pong when we return
810 * to idle and therefor rearm the broadcast
811 * timer before the cpu local timer was able
812 * to fire. This happens because the forced
813 * reprogramming makes sure that the event
814 * will happen in the future and depending on
815 * the min_delta setting this might be far
816 * enough out that the ping-pong starts.
817 *
818 * If the cpu local next_event has expired
819 * then we know that the broadcast timer
820 * next_event has expired as well and
821 * broadcast is about to be handled. So we
822 * avoid reprogramming and enforce that the
823 * broadcast handler, which did not run yet,
824 * will invoke the cpu local handler.
825 *
826 * We cannot call the handler directly from
827 * here, because we might be in a NOHZ phase
828 * and we did not go through the irq_enter()
829 * nohz fixups.
830 */
831 now = ktime_get();
2456e855 832 if (dev->next_event <= now) {
989dcb64
TG
833 cpumask_set_cpu(cpu, tick_broadcast_force_mask);
834 goto out;
835 }
836 /*
837 * We got woken by something else. Reprogram
838 * the cpu local timer device.
839 */
26517f3e 840 tick_program_event(dev->next_event, 1);
79bf2bb3
TG
841 }
842 }
26517f3e 843out:
1fe5d5c3 844 raw_spin_unlock(&tick_broadcast_lock);
da7e6f45 845 return ret;
79bf2bb3
TG
846}
847
5590a536
TG
848/*
849 * Reset the one shot broadcast for a cpu
850 *
851 * Called with tick_broadcast_lock held
852 */
853static void tick_broadcast_clear_oneshot(int cpu)
854{
b352bc1c 855 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
dd5fd9b9 856 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
5590a536
TG
857}
858
6b954823
RR
859static void tick_broadcast_init_next_event(struct cpumask *mask,
860 ktime_t expires)
7300711e
TG
861{
862 struct tick_device *td;
863 int cpu;
864
5db0e1e9 865 for_each_cpu(cpu, mask) {
7300711e
TG
866 td = &per_cpu(tick_cpu_device, cpu);
867 if (td->evtdev)
868 td->evtdev->next_event = expires;
869 }
870}
871
79bf2bb3 872/**
8dce39c2 873 * tick_broadcast_setup_oneshot - setup the broadcast device
79bf2bb3 874 */
94114c36 875static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
79bf2bb3 876{
07f4beb0
TG
877 int cpu = smp_processor_id();
878
c1a9eeb9
TG
879 if (!bc)
880 return;
881
9c17bcda
TG
882 /* Set it up only once ! */
883 if (bc->event_handler != tick_handle_oneshot_broadcast) {
472c4a94 884 int was_periodic = clockevent_state_periodic(bc);
7300711e 885
9c17bcda 886 bc->event_handler = tick_handle_oneshot_broadcast;
7300711e 887
7300711e
TG
888 /*
889 * We must be careful here. There might be other CPUs
890 * waiting for periodic broadcast. We need to set the
891 * oneshot_mask bits for those and program the
892 * broadcast device to fire.
893 */
b352bc1c
TG
894 cpumask_copy(tmpmask, tick_broadcast_mask);
895 cpumask_clear_cpu(cpu, tmpmask);
896 cpumask_or(tick_broadcast_oneshot_mask,
897 tick_broadcast_oneshot_mask, tmpmask);
6b954823 898
b352bc1c 899 if (was_periodic && !cpumask_empty(tmpmask)) {
d7eb231c 900 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
b352bc1c 901 tick_broadcast_init_next_event(tmpmask,
6b954823 902 tick_next_period);
298dbd1c 903 tick_broadcast_set_event(bc, cpu, tick_next_period);
7300711e 904 } else
2456e855 905 bc->next_event = KTIME_MAX;
07f4beb0
TG
906 } else {
907 /*
908 * The first cpu which switches to oneshot mode sets
909 * the bit for all other cpus which are in the general
910 * (periodic) broadcast mask. So the bit is set and
911 * would prevent the first broadcast enter after this
912 * to program the bc device.
913 */
914 tick_broadcast_clear_oneshot(cpu);
9c17bcda 915 }
79bf2bb3
TG
916}
917
918/*
919 * Select oneshot operating mode for the broadcast device
920 */
921void tick_broadcast_switch_to_oneshot(void)
922{
923 struct clock_event_device *bc;
924 unsigned long flags;
925
b5f91da0 926 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
fa4da365
SS
927
928 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
79bf2bb3
TG
929 bc = tick_broadcast_device.evtdev;
930 if (bc)
931 tick_broadcast_setup_oneshot(bc);
77b0d60c 932
b5f91da0 933 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
79bf2bb3
TG
934}
935
a49b116d
TG
936#ifdef CONFIG_HOTPLUG_CPU
937void hotplug_cpu__broadcast_tick_pull(int deadcpu)
938{
939 struct clock_event_device *bc;
940 unsigned long flags;
941
942 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
943 bc = tick_broadcast_device.evtdev;
944
945 if (bc && broadcast_needs_cpu(bc, deadcpu)) {
946 /* This moves the broadcast assignment to this CPU: */
947 clockevents_program_event(bc, bc->next_event, 1);
948 }
949 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
950}
79bf2bb3
TG
951
952/*
953 * Remove a dead CPU from broadcasting
954 */
a49b116d 955void tick_shutdown_broadcast_oneshot(unsigned int cpu)
79bf2bb3 956{
79bf2bb3 957 unsigned long flags;
79bf2bb3 958
b5f91da0 959 raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
79bf2bb3 960
31d9b393 961 /*
c9b5a266
TG
962 * Clear the broadcast masks for the dead cpu, but do not stop
963 * the broadcast device!
31d9b393 964 */
b352bc1c 965 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
c9b5a266
TG
966 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
967 cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
79bf2bb3 968
b5f91da0 969 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
79bf2bb3 970}
a49b116d 971#endif
79bf2bb3 972
27ce4cb4
TG
973/*
974 * Check, whether the broadcast device is in one shot mode
975 */
976int tick_broadcast_oneshot_active(void)
977{
978 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
979}
980
3a142a06
TG
981/*
982 * Check whether the broadcast device supports oneshot.
983 */
984bool tick_broadcast_oneshot_available(void)
985{
986 struct clock_event_device *bc = tick_broadcast_device.evtdev;
987
988 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
989}
990
f32dd117
TG
991#else
992int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
993{
994 struct clock_event_device *bc = tick_broadcast_device.evtdev;
995
996 if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
997 return -EBUSY;
998
999 return 0;
1000}
79bf2bb3 1001#endif
b352bc1c
TG
1002
1003void __init tick_broadcast_init(void)
1004{
fbd44a60 1005 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
07bd1172 1006 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
fbd44a60 1007 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
b352bc1c 1008#ifdef CONFIG_TICK_ONESHOT
fbd44a60
TG
1009 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
1010 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
1011 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
b352bc1c
TG
1012#endif
1013}