watchdog/sparc64: define HARDLOCKUP_DETECTOR_SPARC64
[linux-2.6-block.git] / kernel / watchdog.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
58687acb
DZ
2/*
3 * Detect hard and soft lockups on a system
4 *
5 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
6 *
86f5e6a7
FLVC
7 * Note: Most of this code is borrowed heavily from the original softlockup
8 * detector, so thanks to Ingo for the initial implementation.
9 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
58687acb
DZ
10 * to those contributors as well.
11 */
12
5f92a7b0 13#define pr_fmt(fmt) "watchdog: " fmt
4501980a 14
58687acb
DZ
15#include <linux/mm.h>
16#include <linux/cpu.h>
17#include <linux/nmi.h>
18#include <linux/init.h>
58687acb
DZ
19#include <linux/module.h>
20#include <linux/sysctl.h>
fe4ba3c3 21#include <linux/tick.h>
e6017571 22#include <linux/sched/clock.h>
b17b0153 23#include <linux/sched/debug.h>
78634061 24#include <linux/sched/isolation.h>
9cf57731 25#include <linux/stop_machine.h>
58687acb
DZ
26
27#include <asm/irq_regs.h>
5d1c0f4a 28#include <linux/kvm_para.h>
58687acb 29
946d1977 30static DEFINE_MUTEX(watchdog_mutex);
ab992dc3 31
47f4cb43 32#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
df95d308 33# define WATCHDOG_HARDLOCKUP_DEFAULT 1
84d56e66 34#else
df95d308 35# define WATCHDOG_HARDLOCKUP_DEFAULT 0
84d56e66 36#endif
05a4a952 37
09154985
TG
38unsigned long __read_mostly watchdog_enabled;
39int __read_mostly watchdog_user_enabled = 1;
df95d308
DA
40static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
41static int __read_mostly watchdog_softlockup_user_enabled = 1;
7feeb9cd 42int __read_mostly watchdog_thresh = 10;
df95d308 43static int __read_mostly watchdog_hardlockup_available;
7feeb9cd 44
7feeb9cd
TG
45struct cpumask watchdog_cpumask __read_mostly;
46unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
47
05a4a952 48#ifdef CONFIG_HARDLOCKUP_DETECTOR
f117955a
GP
49
50# ifdef CONFIG_SMP
51int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
52# endif /* CONFIG_SMP */
53
05a4a952
NP
54/*
55 * Should we panic when a soft-lockup or hard-lockup occurs:
56 */
57unsigned int __read_mostly hardlockup_panic =
67fca000 58 IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
05a4a952
NP
59/*
60 * We may not want to enable hard lockup detection by default in all cases,
61 * for example when running the kernel as a guest on a hypervisor. In these
62 * cases this function can be called to disable hard lockup detection. This
63 * function should only be executed once by the boot processor before the
64 * kernel command line parameters are parsed, because otherwise it is not
65 * possible to override this in hardlockup_panic_setup().
66 */
7a355820 67void __init hardlockup_detector_disable(void)
05a4a952 68{
df95d308 69 watchdog_hardlockup_user_enabled = 0;
05a4a952
NP
70}
71
72static int __init hardlockup_panic_setup(char *str)
73{
74 if (!strncmp(str, "panic", 5))
75 hardlockup_panic = 1;
76 else if (!strncmp(str, "nopanic", 7))
77 hardlockup_panic = 0;
78 else if (!strncmp(str, "0", 1))
df95d308 79 watchdog_hardlockup_user_enabled = 0;
05a4a952 80 else if (!strncmp(str, "1", 1))
df95d308 81 watchdog_hardlockup_user_enabled = 1;
05a4a952
NP
82 return 1;
83}
84__setup("nmi_watchdog=", hardlockup_panic_setup);
85
368a7e2c 86#endif /* CONFIG_HARDLOCKUP_DETECTOR */
05a4a952 87
1f423c90 88#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)
81972551 89
77c12fc9
DA
90static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
91static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
1610611a 92static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
ed92e1ef 93static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
1610611a 94static unsigned long watchdog_hardlockup_all_cpu_dumped;
81972551 95
ed92e1ef
DA
96notrace void arch_touch_nmi_watchdog(void)
97{
98 /*
99 * Using __raw here because some code paths have
100 * preemption enabled. If preemption is enabled
101 * then interrupts should be enabled too, in which
102 * case we shouldn't have to worry about the watchdog
103 * going off.
104 */
105 raw_cpu_write(watchdog_hardlockup_touched, true);
106}
107EXPORT_SYMBOL(arch_touch_nmi_watchdog);
108
1f423c90
DA
109void watchdog_hardlockup_touch_cpu(unsigned int cpu)
110{
111 per_cpu(watchdog_hardlockup_touched, cpu) = true;
1f423c90
DA
112}
113
77c12fc9 114static bool is_hardlockup(unsigned int cpu)
81972551 115{
77c12fc9 116 int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
81972551 117
77c12fc9 118 if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
81972551
DA
119 return true;
120
77c12fc9
DA
121 /*
122 * NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE
123 * for hrtimer_interrupts_saved. hrtimer_interrupts_saved is
124 * written/read by a single CPU.
125 */
126 per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
1610611a 127
81972551
DA
128 return false;
129}
130
d3b62ace 131static void watchdog_hardlockup_kick(void)
81972551 132{
d3b62ace
DA
133 int new_interrupts;
134
135 new_interrupts = atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts));
136 watchdog_buddy_check_hardlockup(new_interrupts);
81972551
DA
137}
138
77c12fc9 139void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
81972551 140{
ed92e1ef
DA
141 if (per_cpu(watchdog_hardlockup_touched, cpu)) {
142 per_cpu(watchdog_hardlockup_touched, cpu) = false;
143 return;
144 }
145
1610611a
DA
146 /*
147 * Check for a hardlockup by making sure the CPU's timer
148 * interrupt is incrementing. The timer interrupt should have
149 * fired multiple times before we overflow'd. If it hasn't
81972551
DA
150 * then this is a good indication the cpu is stuck
151 */
77c12fc9 152 if (is_hardlockup(cpu)) {
1610611a 153 unsigned int this_cpu = smp_processor_id();
7a71d8e6
DA
154 struct cpumask backtrace_mask;
155
156 cpumask_copy(&backtrace_mask, cpu_online_mask);
81972551 157
1610611a 158 /* Only print hardlockups once. */
77c12fc9 159 if (per_cpu(watchdog_hardlockup_warned, cpu))
81972551
DA
160 return;
161
77c12fc9 162 pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu);
81972551
DA
163 print_modules();
164 print_irqtrace_events(current);
77c12fc9
DA
165 if (cpu == this_cpu) {
166 if (regs)
167 show_regs(regs);
168 else
169 dump_stack();
170 cpumask_clear_cpu(cpu, &backtrace_mask);
171 } else {
172 if (trigger_single_cpu_backtrace(cpu))
173 cpumask_clear_cpu(cpu, &backtrace_mask);
174 }
81972551
DA
175
176 /*
77c12fc9
DA
177 * Perform multi-CPU dump only once to avoid multiple
178 * hardlockups generating interleaving traces
81972551
DA
179 */
180 if (sysctl_hardlockup_all_cpu_backtrace &&
1610611a 181 !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped))
77c12fc9 182 trigger_cpumask_backtrace(&backtrace_mask);
81972551
DA
183
184 if (hardlockup_panic)
185 nmi_panic(regs, "Hard LOCKUP");
186
77c12fc9 187 per_cpu(watchdog_hardlockup_warned, cpu) = true;
1610611a 188 } else {
77c12fc9 189 per_cpu(watchdog_hardlockup_warned, cpu) = false;
81972551 190 }
81972551
DA
191}
192
1f423c90 193#else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
81972551 194
d3b62ace 195static inline void watchdog_hardlockup_kick(void) { }
81972551 196
1f423c90 197#endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
81972551 198
05a4a952 199/*
d9b3629a 200 * These functions can be overridden based on the configured hardlockdup detector.
a10a842f 201 *
df95d308 202 * watchdog_hardlockup_enable/disable can be implemented to start and stop when
d9b3629a 203 * softlockup watchdog start and stop. The detector must select the
a10a842f 204 * SOFTLOCKUP_DETECTOR Kconfig.
05a4a952 205 */
d9b3629a 206void __weak watchdog_hardlockup_enable(unsigned int cpu) { }
941154bd 207
d9b3629a 208void __weak watchdog_hardlockup_disable(unsigned int cpu) { }
05a4a952 209
930d8f8d
LC
210/*
211 * Watchdog-detector specific API.
212 *
213 * Return 0 when hardlockup watchdog is available, negative value otherwise.
214 * Note that the negative value means that a delayed probe might
215 * succeed later.
216 */
df95d308 217int __weak __init watchdog_hardlockup_probe(void)
a994a314 218{
d9b3629a 219 return -ENODEV;
a994a314
TG
220}
221
6592ad2f 222/**
df95d308 223 * watchdog_hardlockup_stop - Stop the watchdog for reconfiguration
6592ad2f 224 *
6b9dc480 225 * The reconfiguration steps are:
df95d308 226 * watchdog_hardlockup_stop();
6592ad2f 227 * update_variables();
df95d308 228 * watchdog_hardlockup_start();
6b9dc480 229 */
df95d308 230void __weak watchdog_hardlockup_stop(void) { }
6b9dc480
TG
231
232/**
df95d308 233 * watchdog_hardlockup_start - Start the watchdog after reconfiguration
6592ad2f 234 *
df95d308 235 * Counterpart to watchdog_hardlockup_stop().
6b9dc480
TG
236 *
237 * The following variables have been updated in update_variables() and
238 * contain the currently valid configuration:
7feeb9cd 239 * - watchdog_enabled
a10a842f
NP
240 * - watchdog_thresh
241 * - watchdog_cpumask
a10a842f 242 */
df95d308 243void __weak watchdog_hardlockup_start(void) { }
a10a842f 244
09154985
TG
245/**
246 * lockup_detector_update_enable - Update the sysctl enable bit
247 *
df95d308
DA
248 * Caller needs to make sure that the hard watchdogs are off, so this
249 * can't race with watchdog_hardlockup_disable().
09154985
TG
250 */
251static void lockup_detector_update_enable(void)
252{
253 watchdog_enabled = 0;
254 if (!watchdog_user_enabled)
255 return;
df95d308
DA
256 if (watchdog_hardlockup_available && watchdog_hardlockup_user_enabled)
257 watchdog_enabled |= WATCHDOG_HARDLOCKUP_ENABLED;
258 if (watchdog_softlockup_user_enabled)
259 watchdog_enabled |= WATCHDOG_SOFTOCKUP_ENABLED;
09154985
TG
260}
261
05a4a952
NP
262#ifdef CONFIG_SOFTLOCKUP_DETECTOR
263
fef06efc
PM
264/*
265 * Delay the soflockup report when running a known slow code.
266 * It does _not_ affect the timestamp of the last successdul reschedule.
267 */
268#define SOFTLOCKUP_DELAY_REPORT ULONG_MAX
11e31f60 269
f117955a
GP
270#ifdef CONFIG_SMP
271int __read_mostly sysctl_softlockup_all_cpu_backtrace;
272#endif
273
e7e04615
SS
274static struct cpumask watchdog_allowed_mask __read_mostly;
275
2b9d7f23
TG
276/* Global variables, exported for sysctl */
277unsigned int __read_mostly softlockup_panic =
67fca000 278 IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);
2eb2527f 279
9cf57731 280static bool softlockup_initialized __read_mostly;
0f34c400 281static u64 __read_mostly sample_period;
58687acb 282
fef06efc 283/* Timestamp taken after the last successful reschedule. */
58687acb 284static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
fef06efc
PM
285/* Timestamp of the last softlockup report. */
286static DEFINE_PER_CPU(unsigned long, watchdog_report_ts);
58687acb
DZ
287static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
288static DEFINE_PER_CPU(bool, softlockup_touch_sync);
ed235875 289static unsigned long soft_lockup_nmi_warn;
58687acb 290
58687acb
DZ
291static int __init nowatchdog_setup(char *str)
292{
09154985 293 watchdog_user_enabled = 0;
58687acb
DZ
294 return 1;
295}
296__setup("nowatchdog", nowatchdog_setup);
297
58687acb
DZ
298static int __init nosoftlockup_setup(char *str)
299{
df95d308 300 watchdog_softlockup_user_enabled = 0;
58687acb
DZ
301 return 1;
302}
303__setup("nosoftlockup", nosoftlockup_setup);
195daf66 304
11295055
LO
305static int __init watchdog_thresh_setup(char *str)
306{
307 get_option(&str, &watchdog_thresh);
308 return 1;
309}
310__setup("watchdog_thresh=", watchdog_thresh_setup);
311
941154bd
TG
312static void __lockup_detector_cleanup(void);
313
4eec42f3
MSB
314/*
315 * Hard-lockup warnings should be triggered after just a few seconds. Soft-
316 * lockups can have false positives under extreme conditions. So we generally
317 * want a higher threshold for soft lockups than for hard lockups. So we couple
318 * the thresholds with a factor: we make the soft threshold twice the amount of
319 * time the hard threshold is.
320 */
6e9101ae 321static int get_softlockup_thresh(void)
4eec42f3
MSB
322{
323 return watchdog_thresh * 2;
324}
58687acb
DZ
325
326/*
327 * Returns seconds, approximately. We don't need nanosecond
328 * resolution, and we don't need to waste time with a big divide when
329 * 2^30ns == 1.074s.
330 */
c06b4f19 331static unsigned long get_timestamp(void)
58687acb 332{
545a2bf7 333 return running_clock() >> 30LL; /* 2^30 ~= 10^9 */
58687acb
DZ
334}
335
0f34c400 336static void set_sample_period(void)
58687acb
DZ
337{
338 /*
586692a5 339 * convert watchdog_thresh from seconds to ns
86f5e6a7
FLVC
340 * the divide by 5 is to give hrtimer several chances (two
341 * or three with the current relation between the soft
342 * and hard thresholds) to increment before the
343 * hardlockup detector generates a warning
58687acb 344 */
0f34c400 345 sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
7edaeb68 346 watchdog_update_hrtimer_threshold(sample_period);
58687acb
DZ
347}
348
fef06efc
PM
349static void update_report_ts(void)
350{
351 __this_cpu_write(watchdog_report_ts, get_timestamp());
352}
353
58687acb 354/* Commands for resetting the watchdog */
7c0012f5 355static void update_touch_ts(void)
58687acb 356{
c06b4f19 357 __this_cpu_write(watchdog_touch_ts, get_timestamp());
fef06efc 358 update_report_ts();
58687acb
DZ
359}
360
03e0d461
TH
361/**
362 * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
363 *
364 * Call when the scheduler may have stalled for legitimate reasons
365 * preventing the watchdog task from executing - e.g. the scheduler
366 * entering idle state. This should only be used for scheduler events.
367 * Use touch_softlockup_watchdog() for everything else.
368 */
cb9d7fd5 369notrace void touch_softlockup_watchdog_sched(void)
58687acb 370{
7861144b 371 /*
fef06efc
PM
372 * Preemption can be enabled. It doesn't matter which CPU's watchdog
373 * report period gets restarted here, so use the raw_ operation.
7861144b 374 */
fef06efc 375 raw_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
58687acb 376}
03e0d461 377
cb9d7fd5 378notrace void touch_softlockup_watchdog(void)
03e0d461
TH
379{
380 touch_softlockup_watchdog_sched();
82607adc 381 wq_watchdog_touch(raw_smp_processor_id());
03e0d461 382}
0167c781 383EXPORT_SYMBOL(touch_softlockup_watchdog);
58687acb 384
332fbdbc 385void touch_all_softlockup_watchdogs(void)
58687acb
DZ
386{
387 int cpu;
388
389 /*
d57108d4
TG
390 * watchdog_mutex cannpt be taken here, as this might be called
391 * from (soft)interrupt context, so the access to
392 * watchdog_allowed_cpumask might race with a concurrent update.
393 *
394 * The watchdog time stamp can race against a concurrent real
395 * update as well, the only side effect might be a cycle delay for
396 * the softlockup check.
58687acb 397 */
89e28ce6 398 for_each_cpu(cpu, &watchdog_allowed_mask) {
fef06efc 399 per_cpu(watchdog_report_ts, cpu) = SOFTLOCKUP_DELAY_REPORT;
89e28ce6
WQ
400 wq_watchdog_touch(cpu);
401 }
58687acb
DZ
402}
403
58687acb
DZ
404void touch_softlockup_watchdog_sync(void)
405{
f7f66b05 406 __this_cpu_write(softlockup_touch_sync, true);
fef06efc 407 __this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
58687acb
DZ
408}
409
0f90b88d
PM
410static int is_softlockup(unsigned long touch_ts,
411 unsigned long period_ts,
412 unsigned long now)
58687acb 413{
df95d308 414 if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) {
195daf66 415 /* Warn about unreasonable delays. */
fef06efc 416 if (time_after(now, period_ts + get_softlockup_thresh()))
195daf66
UO
417 return now - touch_ts;
418 }
58687acb
DZ
419 return 0;
420}
421
05a4a952 422/* watchdog detector functions */
be45bf53
PZ
423static DEFINE_PER_CPU(struct completion, softlockup_completion);
424static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
425
9cf57731 426/*
b124ac45 427 * The watchdog feed function - touches the timestamp.
9cf57731
PZ
428 *
429 * It only runs once every sample_period seconds (4 seconds by
430 * default) to reset the softlockup timestamp. If this gets delayed
431 * for more than 2*watchdog_thresh seconds then the debug-printout
432 * triggers in watchdog_timer_fn().
433 */
434static int softlockup_fn(void *data)
435{
7c0012f5 436 update_touch_ts();
be45bf53 437 complete(this_cpu_ptr(&softlockup_completion));
9cf57731
PZ
438
439 return 0;
440}
441
58687acb
DZ
442/* watchdog kicker functions */
443static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
444{
0f90b88d 445 unsigned long touch_ts, period_ts, now;
58687acb
DZ
446 struct pt_regs *regs = get_irq_regs();
447 int duration;
ed235875 448 int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
58687acb 449
01f0a027 450 if (!watchdog_enabled)
b94f5118
DZ
451 return HRTIMER_NORESTART;
452
d3b62ace 453 watchdog_hardlockup_kick();
58687acb
DZ
454
455 /* kick the softlockup detector */
be45bf53
PZ
456 if (completion_done(this_cpu_ptr(&softlockup_completion))) {
457 reinit_completion(this_cpu_ptr(&softlockup_completion));
458 stop_one_cpu_nowait(smp_processor_id(),
459 softlockup_fn, NULL,
460 this_cpu_ptr(&softlockup_stop_work));
461 }
58687acb
DZ
462
463 /* .. and repeat */
0f34c400 464 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
58687acb 465
0f90b88d
PM
466 /*
467 * Read the current timestamp first. It might become invalid anytime
468 * when a virtual machine is stopped by the host or when the watchog
469 * is touched from NMI.
470 */
471 now = get_timestamp();
9bf3bc94
PM
472 /*
473 * If a virtual machine is stopped by the host it can look to
0f90b88d 474 * the watchdog like a soft lockup. This function touches the watchdog.
9bf3bc94
PM
475 */
476 kvm_check_and_clear_guest_paused();
0f90b88d
PM
477 /*
478 * The stored timestamp is comparable with @now only when not touched.
479 * It might get touched anytime from NMI. Make sure that is_softlockup()
480 * uses the same (valid) value.
481 */
482 period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));
9bf3bc94
PM
483
484 /* Reset the interval when touched by known problematic code. */
fef06efc 485 if (period_ts == SOFTLOCKUP_DELAY_REPORT) {
909ea964 486 if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
58687acb
DZ
487 /*
488 * If the time stamp was touched atomically
489 * make sure the scheduler tick is up to date.
490 */
909ea964 491 __this_cpu_write(softlockup_touch_sync, false);
58687acb
DZ
492 sched_clock_tick();
493 }
5d1c0f4a 494
fef06efc 495 update_report_ts();
58687acb
DZ
496 return HRTIMER_RESTART;
497 }
498
0f90b88d
PM
499 /* Check for a softlockup. */
500 touch_ts = __this_cpu_read(watchdog_touch_ts);
501 duration = is_softlockup(touch_ts, period_ts, now);
58687acb 502 if (unlikely(duration)) {
9f113bf7
PM
503 /*
504 * Prevent multiple soft-lockup reports if one cpu is already
505 * engaged in dumping all cpu back traces.
506 */
ed235875 507 if (softlockup_all_cpu_backtrace) {
9f113bf7 508 if (test_and_set_bit_lock(0, &soft_lockup_nmi_warn))
ed235875 509 return HRTIMER_RESTART;
ed235875
AT
510 }
511
c9ad17c9 512 /* Start period for the next softlockup warning. */
fef06efc 513 update_report_ts();
c9ad17c9 514
656c3b79 515 pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
26e09c6e 516 smp_processor_id(), duration,
58687acb
DZ
517 current->comm, task_pid_nr(current));
518 print_modules();
519 print_irqtrace_events(current);
520 if (regs)
521 show_regs(regs);
522 else
523 dump_stack();
524
ed235875 525 if (softlockup_all_cpu_backtrace) {
ed235875 526 trigger_allbutself_cpu_backtrace();
9f113bf7 527 clear_bit_unlock(0, &soft_lockup_nmi_warn);
ed235875
AT
528 }
529
69361eef 530 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
58687acb
DZ
531 if (softlockup_panic)
532 panic("softlockup: hung tasks");
1bc503cb 533 }
58687acb
DZ
534
535 return HRTIMER_RESTART;
536}
537
bcd951cf 538static void watchdog_enable(unsigned int cpu)
58687acb 539{
01f0a027 540 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
be45bf53 541 struct completion *done = this_cpu_ptr(&softlockup_completion);
58687acb 542
9cf57731
PZ
543 WARN_ON_ONCE(cpu != smp_processor_id());
544
be45bf53
PZ
545 init_completion(done);
546 complete(done);
547
01f0a027 548 /*
df95d308 549 * Start the timer first to prevent the hardlockup watchdog triggering
01f0a027
TG
550 * before the timer has a chance to fire.
551 */
d2ab4cf4 552 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
3935e895 553 hrtimer->function = watchdog_timer_fn;
01f0a027 554 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
d2ab4cf4 555 HRTIMER_MODE_REL_PINNED_HARD);
3935e895 556
01f0a027 557 /* Initialize timestamp */
7c0012f5 558 update_touch_ts();
df95d308
DA
559 /* Enable the hardlockup detector */
560 if (watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)
561 watchdog_hardlockup_enable(cpu);
bcd951cf 562}
58687acb 563
bcd951cf
TG
564static void watchdog_disable(unsigned int cpu)
565{
01f0a027 566 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
58687acb 567
9cf57731
PZ
568 WARN_ON_ONCE(cpu != smp_processor_id());
569
01f0a027 570 /*
df95d308
DA
571 * Disable the hardlockup detector first. That prevents that a large
572 * delay between disabling the timer and disabling the hardlockup
573 * detector causes a false positive.
01f0a027 574 */
df95d308 575 watchdog_hardlockup_disable(cpu);
01f0a027 576 hrtimer_cancel(hrtimer);
be45bf53 577 wait_for_completion(this_cpu_ptr(&softlockup_completion));
58687acb
DZ
578}
579
9cf57731 580static int softlockup_stop_fn(void *data)
b8900bc0 581{
9cf57731
PZ
582 watchdog_disable(smp_processor_id());
583 return 0;
b8900bc0
FW
584}
585
9cf57731 586static void softlockup_stop_all(void)
bcd951cf 587{
9cf57731
PZ
588 int cpu;
589
590 if (!softlockup_initialized)
591 return;
592
593 for_each_cpu(cpu, &watchdog_allowed_mask)
594 smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
595
596 cpumask_clear(&watchdog_allowed_mask);
bcd951cf
TG
597}
598
9cf57731 599static int softlockup_start_fn(void *data)
bcd951cf 600{
9cf57731
PZ
601 watchdog_enable(smp_processor_id());
602 return 0;
bcd951cf 603}
58687acb 604
9cf57731 605static void softlockup_start_all(void)
2eb2527f 606{
9cf57731 607 int cpu;
2eb2527f 608
9cf57731
PZ
609 cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
610 for_each_cpu(cpu, &watchdog_allowed_mask)
611 smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
2eb2527f
TG
612}
613
9cf57731 614int lockup_detector_online_cpu(unsigned int cpu)
2eb2527f 615{
7dd47617
TG
616 if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
617 watchdog_enable(cpu);
9cf57731 618 return 0;
2eb2527f
TG
619}
620
9cf57731 621int lockup_detector_offline_cpu(unsigned int cpu)
2eb2527f 622{
7dd47617
TG
623 if (cpumask_test_cpu(cpu, &watchdog_allowed_mask))
624 watchdog_disable(cpu);
9cf57731 625 return 0;
2eb2527f
TG
626}
627
7c56a873 628static void __lockup_detector_reconfigure(void)
2eb2527f 629{
e31d6883 630 cpus_read_lock();
df95d308 631 watchdog_hardlockup_stop();
9cf57731
PZ
632
633 softlockup_stop_all();
2eb2527f 634 set_sample_period();
09154985
TG
635 lockup_detector_update_enable();
636 if (watchdog_enabled && watchdog_thresh)
9cf57731
PZ
637 softlockup_start_all();
638
df95d308 639 watchdog_hardlockup_start();
e31d6883
TG
640 cpus_read_unlock();
641 /*
642 * Must be called outside the cpus locked section to prevent
643 * recursive locking in the perf code.
644 */
645 __lockup_detector_cleanup();
2eb2527f
TG
646}
647
7c56a873
LD
648void lockup_detector_reconfigure(void)
649{
650 mutex_lock(&watchdog_mutex);
651 __lockup_detector_reconfigure();
652 mutex_unlock(&watchdog_mutex);
653}
654
2eb2527f 655/*
b124ac45 656 * Create the watchdog infrastructure and configure the detector(s).
2eb2527f 657 */
5587185d 658static __init void lockup_detector_setup(void)
2eb2527f 659{
2eb2527f
TG
660 /*
661 * If sysctl is off and watchdog got disabled on the command line,
662 * nothing to do here.
663 */
09154985
TG
664 lockup_detector_update_enable();
665
2eb2527f
TG
666 if (!IS_ENABLED(CONFIG_SYSCTL) &&
667 !(watchdog_enabled && watchdog_thresh))
668 return;
669
2eb2527f 670 mutex_lock(&watchdog_mutex);
7c56a873 671 __lockup_detector_reconfigure();
9cf57731 672 softlockup_initialized = true;
2eb2527f
TG
673 mutex_unlock(&watchdog_mutex);
674}
675
2b9d7f23 676#else /* CONFIG_SOFTLOCKUP_DETECTOR */
7c56a873 677static void __lockup_detector_reconfigure(void)
6592ad2f 678{
e31d6883 679 cpus_read_lock();
df95d308 680 watchdog_hardlockup_stop();
09154985 681 lockup_detector_update_enable();
df95d308 682 watchdog_hardlockup_start();
e31d6883 683 cpus_read_unlock();
6592ad2f 684}
7c56a873
LD
685void lockup_detector_reconfigure(void)
686{
687 __lockup_detector_reconfigure();
688}
5587185d 689static inline void lockup_detector_setup(void)
34ddaa3e 690{
7c56a873 691 __lockup_detector_reconfigure();
34ddaa3e 692}
2b9d7f23 693#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
05a4a952 694
941154bd
TG
695static void __lockup_detector_cleanup(void)
696{
697 lockdep_assert_held(&watchdog_mutex);
698 hardlockup_detector_perf_cleanup();
699}
700
701/**
702 * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
703 *
704 * Caller must not hold the cpu hotplug rwsem.
705 */
706void lockup_detector_cleanup(void)
707{
708 mutex_lock(&watchdog_mutex);
709 __lockup_detector_cleanup();
710 mutex_unlock(&watchdog_mutex);
711}
712
6554fd8c
TG
713/**
714 * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
715 *
716 * Special interface for parisc. It prevents lockup detector warnings from
717 * the default pm_poweroff() function which busy loops forever.
718 */
719void lockup_detector_soft_poweroff(void)
720{
721 watchdog_enabled = 0;
722}
723
58cf690a
UO
724#ifdef CONFIG_SYSCTL
725
b124ac45 726/* Propagate any changes to the watchdog infrastructure */
d57108d4 727static void proc_watchdog_update(void)
a0c9cbb9 728{
e8b62b2d
TG
729 /* Remove impossible cpus to keep sysctl output clean. */
730 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
7c56a873 731 __lockup_detector_reconfigure();
a0c9cbb9
UO
732}
733
ef246a21
UO
734/*
735 * common function for watchdog, nmi_watchdog and soft_watchdog parameter
736 *
df95d308
DA
737 * caller | table->data points to | 'which'
738 * -------------------|----------------------------------|-------------------------------
739 * proc_watchdog | watchdog_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED |
740 * | | WATCHDOG_SOFTOCKUP_ENABLED
741 * -------------------|----------------------------------|-------------------------------
742 * proc_nmi_watchdog | watchdog_hardlockup_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED
743 * -------------------|----------------------------------|-------------------------------
744 * proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED
ef246a21
UO
745 */
746static int proc_watchdog_common(int which, struct ctl_table *table, int write,
32927393 747 void *buffer, size_t *lenp, loff_t *ppos)
ef246a21 748{
09154985 749 int err, old, *param = table->data;
ef246a21 750
946d1977 751 mutex_lock(&watchdog_mutex);
ef246a21 752
ef246a21 753 if (!write) {
09154985
TG
754 /*
755 * On read synchronize the userspace interface. This is a
756 * racy snapshot.
757 */
758 *param = (watchdog_enabled & which) != 0;
ef246a21
UO
759 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
760 } else {
09154985 761 old = READ_ONCE(*param);
ef246a21 762 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
09154985 763 if (!err && old != READ_ONCE(*param))
d57108d4 764 proc_watchdog_update();
ef246a21 765 }
946d1977 766 mutex_unlock(&watchdog_mutex);
ef246a21
UO
767 return err;
768}
769
83a80a39
UO
770/*
771 * /proc/sys/kernel/watchdog
772 */
773int proc_watchdog(struct ctl_table *table, int write,
32927393 774 void *buffer, size_t *lenp, loff_t *ppos)
83a80a39 775{
df95d308
DA
776 return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED |
777 WATCHDOG_SOFTOCKUP_ENABLED,
83a80a39
UO
778 table, write, buffer, lenp, ppos);
779}
780
781/*
782 * /proc/sys/kernel/nmi_watchdog
58687acb 783 */
83a80a39 784int proc_nmi_watchdog(struct ctl_table *table, int write,
32927393 785 void *buffer, size_t *lenp, loff_t *ppos)
83a80a39 786{
df95d308 787 if (!watchdog_hardlockup_available && write)
a994a314 788 return -ENOTSUPP;
df95d308 789 return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED,
83a80a39
UO
790 table, write, buffer, lenp, ppos);
791}
792
793/*
794 * /proc/sys/kernel/soft_watchdog
795 */
796int proc_soft_watchdog(struct ctl_table *table, int write,
32927393 797 void *buffer, size_t *lenp, loff_t *ppos)
83a80a39 798{
df95d308 799 return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED,
83a80a39
UO
800 table, write, buffer, lenp, ppos);
801}
58687acb 802
83a80a39
UO
803/*
804 * /proc/sys/kernel/watchdog_thresh
805 */
806int proc_watchdog_thresh(struct ctl_table *table, int write,
32927393 807 void *buffer, size_t *lenp, loff_t *ppos)
58687acb 808{
d57108d4 809 int err, old;
58687acb 810
946d1977 811 mutex_lock(&watchdog_mutex);
bcd951cf 812
d57108d4 813 old = READ_ONCE(watchdog_thresh);
b8900bc0 814 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
83a80a39 815
d57108d4
TG
816 if (!err && write && old != READ_ONCE(watchdog_thresh))
817 proc_watchdog_update();
e04ab2bc 818
946d1977 819 mutex_unlock(&watchdog_mutex);
b8900bc0 820 return err;
58687acb 821}
fe4ba3c3
CM
822
823/*
824 * The cpumask is the mask of possible cpus that the watchdog can run
825 * on, not the mask of cpus it is actually running on. This allows the
826 * user to specify a mask that will include cpus that have not yet
827 * been brought online, if desired.
828 */
829int proc_watchdog_cpumask(struct ctl_table *table, int write,
32927393 830 void *buffer, size_t *lenp, loff_t *ppos)
fe4ba3c3
CM
831{
832 int err;
833
946d1977 834 mutex_lock(&watchdog_mutex);
8c073d27 835
fe4ba3c3 836 err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
05ba3de7 837 if (!err && write)
e8b62b2d 838 proc_watchdog_update();
5490125d 839
946d1977 840 mutex_unlock(&watchdog_mutex);
fe4ba3c3
CM
841 return err;
842}
dd0693fd
XN
843
844static const int sixty = 60;
845
846static struct ctl_table watchdog_sysctls[] = {
847 {
848 .procname = "watchdog",
849 .data = &watchdog_user_enabled,
850 .maxlen = sizeof(int),
851 .mode = 0644,
852 .proc_handler = proc_watchdog,
853 .extra1 = SYSCTL_ZERO,
854 .extra2 = SYSCTL_ONE,
855 },
856 {
857 .procname = "watchdog_thresh",
858 .data = &watchdog_thresh,
859 .maxlen = sizeof(int),
860 .mode = 0644,
861 .proc_handler = proc_watchdog_thresh,
862 .extra1 = SYSCTL_ZERO,
863 .extra2 = (void *)&sixty,
864 },
dd0693fd
XN
865 {
866 .procname = "watchdog_cpumask",
867 .data = &watchdog_cpumask_bits,
868 .maxlen = NR_CPUS,
869 .mode = 0644,
870 .proc_handler = proc_watchdog_cpumask,
871 },
872#ifdef CONFIG_SOFTLOCKUP_DETECTOR
873 {
874 .procname = "soft_watchdog",
df95d308 875 .data = &watchdog_softlockup_user_enabled,
dd0693fd
XN
876 .maxlen = sizeof(int),
877 .mode = 0644,
878 .proc_handler = proc_soft_watchdog,
879 .extra1 = SYSCTL_ZERO,
880 .extra2 = SYSCTL_ONE,
881 },
882 {
883 .procname = "softlockup_panic",
884 .data = &softlockup_panic,
885 .maxlen = sizeof(int),
886 .mode = 0644,
887 .proc_handler = proc_dointvec_minmax,
888 .extra1 = SYSCTL_ZERO,
889 .extra2 = SYSCTL_ONE,
890 },
891#ifdef CONFIG_SMP
892 {
893 .procname = "softlockup_all_cpu_backtrace",
894 .data = &sysctl_softlockup_all_cpu_backtrace,
895 .maxlen = sizeof(int),
896 .mode = 0644,
897 .proc_handler = proc_dointvec_minmax,
898 .extra1 = SYSCTL_ZERO,
899 .extra2 = SYSCTL_ONE,
900 },
901#endif /* CONFIG_SMP */
902#endif
903#ifdef CONFIG_HARDLOCKUP_DETECTOR
904 {
905 .procname = "hardlockup_panic",
906 .data = &hardlockup_panic,
907 .maxlen = sizeof(int),
908 .mode = 0644,
909 .proc_handler = proc_dointvec_minmax,
910 .extra1 = SYSCTL_ZERO,
911 .extra2 = SYSCTL_ONE,
912 },
913#ifdef CONFIG_SMP
914 {
915 .procname = "hardlockup_all_cpu_backtrace",
916 .data = &sysctl_hardlockup_all_cpu_backtrace,
917 .maxlen = sizeof(int),
918 .mode = 0644,
919 .proc_handler = proc_dointvec_minmax,
920 .extra1 = SYSCTL_ZERO,
921 .extra2 = SYSCTL_ONE,
922 },
923#endif /* CONFIG_SMP */
924#endif
925 {}
926};
927
9ec272c5
DA
928static struct ctl_table watchdog_hardlockup_sysctl[] = {
929 {
930 .procname = "nmi_watchdog",
931 .data = &watchdog_hardlockup_user_enabled,
932 .maxlen = sizeof(int),
933 .mode = 0444,
934 .proc_handler = proc_nmi_watchdog,
935 .extra1 = SYSCTL_ZERO,
936 .extra2 = SYSCTL_ONE,
937 },
938 {}
939};
940
dd0693fd
XN
941static void __init watchdog_sysctl_init(void)
942{
943 register_sysctl_init("kernel", watchdog_sysctls);
9ec272c5
DA
944
945 if (watchdog_hardlockup_available)
946 watchdog_hardlockup_sysctl[0].mode = 0644;
947 register_sysctl_init("kernel", watchdog_hardlockup_sysctl);
dd0693fd 948}
9ec272c5 949
dd0693fd
XN
950#else
951#define watchdog_sysctl_init() do { } while (0)
58687acb
DZ
952#endif /* CONFIG_SYSCTL */
953
930d8f8d
LC
954static void __init lockup_detector_delay_init(struct work_struct *work);
955static bool allow_lockup_detector_init_retry __initdata;
956
957static struct work_struct detector_work __initdata =
958 __WORK_INITIALIZER(detector_work, lockup_detector_delay_init);
959
960static void __init lockup_detector_delay_init(struct work_struct *work)
961{
962 int ret;
963
964 ret = watchdog_hardlockup_probe();
965 if (ret) {
966 pr_info("Delayed init of the lockup detector failed: %d\n", ret);
967 pr_info("Hard watchdog permanently disabled\n");
968 return;
969 }
970
971 allow_lockup_detector_init_retry = false;
972
973 watchdog_hardlockup_available = true;
974 lockup_detector_setup();
975}
976
977/*
978 * lockup_detector_retry_init - retry init lockup detector if possible.
979 *
980 * Retry hardlockup detector init. It is useful when it requires some
981 * functionality that has to be initialized later on a particular
982 * platform.
983 */
984void __init lockup_detector_retry_init(void)
985{
986 /* Must be called before late init calls */
987 if (!allow_lockup_detector_init_retry)
988 return;
989
990 schedule_work(&detector_work);
991}
992
993/*
994 * Ensure that optional delayed hardlockup init is proceed before
995 * the init code and memory is freed.
996 */
997static int __init lockup_detector_check(void)
998{
999 /* Prevent any later retry. */
1000 allow_lockup_detector_init_retry = false;
1001
1002 /* Make sure no work is pending. */
1003 flush_work(&detector_work);
1004
9ec272c5
DA
1005 watchdog_sysctl_init();
1006
930d8f8d
LC
1007 return 0;
1008
1009}
1010late_initcall_sync(lockup_detector_check);
1011
004417a6 1012void __init lockup_detector_init(void)
58687acb 1013{
13316b31 1014 if (tick_nohz_full_enabled())
314b08ff 1015 pr_info("Disabling watchdog on nohz_full cores by default\n");
13316b31 1016
de201559 1017 cpumask_copy(&watchdog_cpumask,
04d4e665 1018 housekeeping_cpumask(HK_TYPE_TIMER));
fe4ba3c3 1019
df95d308
DA
1020 if (!watchdog_hardlockup_probe())
1021 watchdog_hardlockup_available = true;
930d8f8d
LC
1022 else
1023 allow_lockup_detector_init_retry = true;
1024
5587185d 1025 lockup_detector_setup();
58687acb 1026}