Commit | Line | Data |
---|---|---|
1f423c90 DA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/cpu.h> | |
4 | #include <linux/cpumask.h> | |
5 | #include <linux/kernel.h> | |
6 | #include <linux/nmi.h> | |
7 | #include <linux/percpu-defs.h> | |
8 | ||
9 | static cpumask_t __read_mostly watchdog_cpus; | |
10 | ||
11 | static unsigned int watchdog_next_cpu(unsigned int cpu) | |
12 | { | |
13 | cpumask_t cpus = watchdog_cpus; | |
14 | unsigned int next_cpu; | |
15 | ||
16 | next_cpu = cpumask_next(cpu, &cpus); | |
17 | if (next_cpu >= nr_cpu_ids) | |
18 | next_cpu = cpumask_first(&cpus); | |
19 | ||
20 | if (next_cpu == cpu) | |
21 | return nr_cpu_ids; | |
22 | ||
23 | return next_cpu; | |
24 | } | |
25 | ||
26 | int __init watchdog_hardlockup_probe(void) | |
27 | { | |
28 | return 0; | |
29 | } | |
30 | ||
31 | void watchdog_hardlockup_enable(unsigned int cpu) | |
32 | { | |
33 | unsigned int next_cpu; | |
34 | ||
35 | /* | |
36 | * The new CPU will be marked online before the hrtimer interrupt | |
37 | * gets a chance to run on it. If another CPU tests for a | |
38 | * hardlockup on the new CPU before it has run its the hrtimer | |
39 | * interrupt, it will get a false positive. Touch the watchdog on | |
40 | * the new CPU to delay the check for at least 3 sampling periods | |
41 | * to guarantee one hrtimer has run on the new CPU. | |
42 | */ | |
43 | watchdog_hardlockup_touch_cpu(cpu); | |
44 | ||
45 | /* | |
46 | * We are going to check the next CPU. Our watchdog_hrtimer | |
47 | * need not be zero if the CPU has already been online earlier. | |
48 | * Touch the watchdog on the next CPU to avoid false positive | |
49 | * if we try to check it in less then 3 interrupts. | |
50 | */ | |
51 | next_cpu = watchdog_next_cpu(cpu); | |
52 | if (next_cpu < nr_cpu_ids) | |
53 | watchdog_hardlockup_touch_cpu(next_cpu); | |
54 | ||
55 | cpumask_set_cpu(cpu, &watchdog_cpus); | |
56 | } | |
57 | ||
58 | void watchdog_hardlockup_disable(unsigned int cpu) | |
59 | { | |
60 | unsigned int next_cpu = watchdog_next_cpu(cpu); | |
61 | ||
62 | /* | |
63 | * Offlining this CPU will cause the CPU before this one to start | |
64 | * checking the one after this one. If this CPU just finished checking | |
65 | * the next CPU and updating hrtimer_interrupts_saved, and then the | |
66 | * previous CPU checks it within one sample period, it will trigger a | |
67 | * false positive. Touch the watchdog on the next CPU to prevent it. | |
68 | */ | |
69 | if (next_cpu < nr_cpu_ids) | |
70 | watchdog_hardlockup_touch_cpu(next_cpu); | |
71 | ||
72 | cpumask_clear_cpu(cpu, &watchdog_cpus); | |
73 | } | |
74 | ||
75 | void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts) | |
76 | { | |
77 | unsigned int next_cpu; | |
78 | ||
79 | /* | |
80 | * Test for hardlockups every 3 samples. The sample period is | |
81 | * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over | |
82 | * watchdog_thresh (over by 20%). | |
83 | */ | |
84 | if (hrtimer_interrupts % 3 != 0) | |
85 | return; | |
86 | ||
87 | /* check for a hardlockup on the next CPU */ | |
88 | next_cpu = watchdog_next_cpu(smp_processor_id()); | |
89 | if (next_cpu >= nr_cpu_ids) | |
90 | return; | |
91 | ||
92 | watchdog_hardlockup_check(next_cpu, NULL); | |
93 | } |