Commit | Line | Data |
---|---|---|
1f423c90 DA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/cpu.h> | |
4 | #include <linux/cpumask.h> | |
5 | #include <linux/kernel.h> | |
6 | #include <linux/nmi.h> | |
7 | #include <linux/percpu-defs.h> | |
8 | ||
9 | static cpumask_t __read_mostly watchdog_cpus; | |
10 | ||
11 | static unsigned int watchdog_next_cpu(unsigned int cpu) | |
12 | { | |
1f423c90 DA |
13 | unsigned int next_cpu; |
14 | ||
813efda2 | 15 | next_cpu = cpumask_next(cpu, &watchdog_cpus); |
1f423c90 | 16 | if (next_cpu >= nr_cpu_ids) |
813efda2 | 17 | next_cpu = cpumask_first(&watchdog_cpus); |
1f423c90 DA |
18 | |
19 | if (next_cpu == cpu) | |
20 | return nr_cpu_ids; | |
21 | ||
22 | return next_cpu; | |
23 | } | |
24 | ||
25 | int __init watchdog_hardlockup_probe(void) | |
26 | { | |
27 | return 0; | |
28 | } | |
29 | ||
30 | void watchdog_hardlockup_enable(unsigned int cpu) | |
31 | { | |
32 | unsigned int next_cpu; | |
33 | ||
34 | /* | |
35 | * The new CPU will be marked online before the hrtimer interrupt | |
36 | * gets a chance to run on it. If another CPU tests for a | |
37 | * hardlockup on the new CPU before it has run its the hrtimer | |
38 | * interrupt, it will get a false positive. Touch the watchdog on | |
39 | * the new CPU to delay the check for at least 3 sampling periods | |
40 | * to guarantee one hrtimer has run on the new CPU. | |
41 | */ | |
42 | watchdog_hardlockup_touch_cpu(cpu); | |
43 | ||
44 | /* | |
45 | * We are going to check the next CPU. Our watchdog_hrtimer | |
46 | * need not be zero if the CPU has already been online earlier. | |
47 | * Touch the watchdog on the next CPU to avoid false positive | |
48 | * if we try to check it in less then 3 interrupts. | |
49 | */ | |
50 | next_cpu = watchdog_next_cpu(cpu); | |
51 | if (next_cpu < nr_cpu_ids) | |
52 | watchdog_hardlockup_touch_cpu(next_cpu); | |
53 | ||
28168eca DA |
54 | /* |
55 | * Makes sure that watchdog is touched on this CPU before | |
56 | * other CPUs could see it in watchdog_cpus. The counter | |
57 | * part is in watchdog_buddy_check_hardlockup(). | |
58 | */ | |
59 | smp_wmb(); | |
60 | ||
1f423c90 DA |
61 | cpumask_set_cpu(cpu, &watchdog_cpus); |
62 | } | |
63 | ||
64 | void watchdog_hardlockup_disable(unsigned int cpu) | |
65 | { | |
66 | unsigned int next_cpu = watchdog_next_cpu(cpu); | |
67 | ||
68 | /* | |
69 | * Offlining this CPU will cause the CPU before this one to start | |
70 | * checking the one after this one. If this CPU just finished checking | |
71 | * the next CPU and updating hrtimer_interrupts_saved, and then the | |
72 | * previous CPU checks it within one sample period, it will trigger a | |
73 | * false positive. Touch the watchdog on the next CPU to prevent it. | |
74 | */ | |
75 | if (next_cpu < nr_cpu_ids) | |
76 | watchdog_hardlockup_touch_cpu(next_cpu); | |
77 | ||
28168eca DA |
78 | /* |
79 | * Makes sure that watchdog is touched on the next CPU before | |
80 | * this CPU disappear in watchdog_cpus. The counter part is in | |
81 | * watchdog_buddy_check_hardlockup(). | |
82 | */ | |
83 | smp_wmb(); | |
84 | ||
1f423c90 DA |
85 | cpumask_clear_cpu(cpu, &watchdog_cpus); |
86 | } | |
87 | ||
d3b62ace | 88 | void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) |
1f423c90 DA |
89 | { |
90 | unsigned int next_cpu; | |
91 | ||
92 | /* | |
93 | * Test for hardlockups every 3 samples. The sample period is | |
94 | * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over | |
95 | * watchdog_thresh (over by 20%). | |
96 | */ | |
97 | if (hrtimer_interrupts % 3 != 0) | |
98 | return; | |
99 | ||
100 | /* check for a hardlockup on the next CPU */ | |
101 | next_cpu = watchdog_next_cpu(smp_processor_id()); | |
102 | if (next_cpu >= nr_cpu_ids) | |
103 | return; | |
104 | ||
28168eca DA |
105 | /* |
106 | * Make sure that the watchdog was touched on next CPU when | |
107 | * watchdog_next_cpu() returned another one because of | |
108 | * a change in watchdog_hardlockup_enable()/disable(). | |
109 | */ | |
110 | smp_rmb(); | |
111 | ||
1f423c90 DA |
112 | watchdog_hardlockup_check(next_cpu, NULL); |
113 | } |