Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
1d48922c DZ |
2 | /* |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | |
4 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | |
9c48f1c6 | 5 | * Copyright (C) 2011 Don Zickus Red Hat, Inc. |
1d48922c DZ |
6 | * |
7 | * Pentium III FXSR, SSE support | |
8 | * Gareth Hughes <gareth@valinux.com>, May 2000 | |
9 | */ | |
10 | ||
11 | /* | |
12 | * Handle hardware traps and faults. | |
13 | */ | |
14 | #include <linux/spinlock.h> | |
15 | #include <linux/kprobes.h> | |
16 | #include <linux/kdebug.h> | |
b17b0153 | 17 | #include <linux/sched/debug.h> |
1d48922c | 18 | #include <linux/nmi.h> |
2ab00456 | 19 | #include <linux/debugfs.h> |
c9126b2e DZ |
20 | #include <linux/delay.h> |
21 | #include <linux/hardirq.h> | |
c361db5c | 22 | #include <linux/ratelimit.h> |
c9126b2e | 23 | #include <linux/slab.h> |
69c60c88 | 24 | #include <linux/export.h> |
2a594d4c | 25 | #include <linux/atomic.h> |
e6017571 | 26 | #include <linux/sched/clock.h> |
1d48922c | 27 | |
2a594d4c | 28 | #include <asm/cpu_entry_area.h> |
1d48922c DZ |
29 | #include <asm/traps.h> |
30 | #include <asm/mach_traps.h> | |
c9126b2e | 31 | #include <asm/nmi.h> |
6fd36ba0 | 32 | #include <asm/x86_init.h> |
b279d67d | 33 | #include <asm/reboot.h> |
8e2a7f5b | 34 | #include <asm/cache.h> |
04dcbdb8 | 35 | #include <asm/nospec-branch.h> |
7eb314a2 | 36 | #include <asm/microcode.h> |
e759959f | 37 | #include <asm/sev.h> |
f8b8ee45 | 38 | #include <asm/fred.h> |
c9126b2e | 39 | |
0c4df02d DH |
40 | #define CREATE_TRACE_POINTS |
41 | #include <trace/events/nmi.h> | |
42 | ||
c9126b2e | 43 | struct nmi_desc { |
c455fd92 | 44 | raw_spinlock_t lock; |
c9126b2e DZ |
45 | struct list_head head; |
46 | }; | |
47 | ||
48 | static struct nmi_desc nmi_desc[NMI_MAX] = | |
49 | { | |
50 | { | |
c455fd92 | 51 | .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), |
c9126b2e DZ |
52 | .head = LIST_HEAD_INIT(nmi_desc[0].head), |
53 | }, | |
54 | { | |
c455fd92 | 55 | .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), |
c9126b2e DZ |
56 | .head = LIST_HEAD_INIT(nmi_desc[1].head), |
57 | }, | |
553222f3 | 58 | { |
c455fd92 | 59 | .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock), |
553222f3 DZ |
60 | .head = LIST_HEAD_INIT(nmi_desc[2].head), |
61 | }, | |
62 | { | |
c455fd92 | 63 | .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock), |
553222f3 DZ |
64 | .head = LIST_HEAD_INIT(nmi_desc[3].head), |
65 | }, | |
c9126b2e DZ |
66 | |
67 | }; | |
1d48922c | 68 | |
efc3aac5 DZ |
69 | struct nmi_stats { |
70 | unsigned int normal; | |
71 | unsigned int unknown; | |
72 | unsigned int external; | |
73 | unsigned int swallow; | |
1a3ea611 PM |
74 | unsigned long recv_jiffies; |
75 | unsigned long idt_seq; | |
76 | unsigned long idt_nmi_seq; | |
77 | unsigned long idt_ignored; | |
78 | atomic_long_t idt_calls; | |
79 | unsigned long idt_seq_snap; | |
80 | unsigned long idt_nmi_seq_snap; | |
81 | unsigned long idt_ignored_snap; | |
82 | long idt_calls_snap; | |
efc3aac5 DZ |
83 | }; |
84 | ||
85 | static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); | |
86 | ||
8e2a7f5b | 87 | static int ignore_nmis __read_mostly; |
1d48922c DZ |
88 | |
89 | int unknown_nmi_panic; | |
90 | /* | |
91 | * Prevent NMI reason port (0x61) being accessed simultaneously, can | |
92 | * only be used in NMI handler. | |
93 | */ | |
94 | static DEFINE_RAW_SPINLOCK(nmi_reason_lock); | |
95 | ||
96 | static int __init setup_unknown_nmi_panic(char *str) | |
97 | { | |
98 | unknown_nmi_panic = 1; | |
99 | return 1; | |
100 | } | |
101 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | |
102 | ||
c9126b2e DZ |
103 | #define nmi_to_desc(type) (&nmi_desc[type]) |
104 | ||
2ab00456 | 105 | static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; |
e90c7853 | 106 | |
2ab00456 DH |
107 | static int __init nmi_warning_debugfs(void) |
108 | { | |
109 | debugfs_create_u64("nmi_longest_ns", 0644, | |
110 | arch_debugfs_dir, &nmi_longest_ns); | |
111 | return 0; | |
112 | } | |
113 | fs_initcall(nmi_warning_debugfs); | |
114 | ||
248ed510 | 115 | static void nmi_check_duration(struct nmiaction *action, u64 duration) |
e90c7853 | 116 | { |
e90c7853 | 117 | int remainder_ns, decimal_msecs; |
248ed510 CD |
118 | |
119 | if (duration < nmi_longest_ns || duration < action->max_duration) | |
120 | return; | |
121 | ||
122 | action->max_duration = duration; | |
e90c7853 | 123 | |
f94c91f7 | 124 | remainder_ns = do_div(duration, (1000 * 1000)); |
e90c7853 PZ |
125 | decimal_msecs = remainder_ns / 1000; |
126 | ||
127 | printk_ratelimited(KERN_INFO | |
128 | "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", | |
f94c91f7 | 129 | action->handler, duration, decimal_msecs); |
e90c7853 PZ |
130 | } |
131 | ||
bf9f2ee2 | 132 | static int nmi_handle(unsigned int type, struct pt_regs *regs) |
c9126b2e DZ |
133 | { |
134 | struct nmi_desc *desc = nmi_to_desc(type); | |
135 | struct nmiaction *a; | |
136 | int handled=0; | |
137 | ||
138 | rcu_read_lock(); | |
139 | ||
140 | /* | |
141 | * NMIs are edge-triggered, which means if you have enough | |
142 | * of them concurrently, you can lose some because only one | |
143 | * can be latched at any given time. Walk the whole list | |
144 | * to handle those situations. | |
145 | */ | |
2ab00456 | 146 | list_for_each_entry_rcu(a, &desc->head, list) { |
e90c7853 PZ |
147 | int thishandled; |
148 | u64 delta; | |
2ab00456 | 149 | |
e90c7853 | 150 | delta = sched_clock(); |
0c4df02d DH |
151 | thishandled = a->handler(type, regs); |
152 | handled += thishandled; | |
e90c7853 | 153 | delta = sched_clock() - delta; |
0c4df02d | 154 | trace_nmi_handler(a->handler, (int)delta, thishandled); |
2ab00456 | 155 | |
248ed510 | 156 | nmi_check_duration(a, delta); |
2ab00456 | 157 | } |
c9126b2e | 158 | |
c9126b2e DZ |
159 | rcu_read_unlock(); |
160 | ||
161 | /* return total number of NMI events handled */ | |
162 | return handled; | |
163 | } | |
9326638c | 164 | NOKPROBE_SYMBOL(nmi_handle); |
c9126b2e | 165 | |
72b3fb24 | 166 | int __register_nmi_handler(unsigned int type, struct nmiaction *action) |
c9126b2e DZ |
167 | { |
168 | struct nmi_desc *desc = nmi_to_desc(type); | |
169 | unsigned long flags; | |
170 | ||
a7fed5c0 | 171 | if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list))) |
72b3fb24 LZ |
172 | return -EINVAL; |
173 | ||
c455fd92 | 174 | raw_spin_lock_irqsave(&desc->lock, flags); |
c9126b2e | 175 | |
b227e233 | 176 | /* |
0d443b70 MT |
177 | * Indicate if there are multiple registrations on the |
178 | * internal NMI handler call chains (SERR and IO_CHECK). | |
b227e233 | 179 | */ |
553222f3 DZ |
180 | WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); |
181 | WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); | |
b227e233 | 182 | |
c9126b2e DZ |
183 | /* |
184 | * some handlers need to be executed first otherwise a fake | |
185 | * event confuses some handlers (kdump uses this flag) | |
186 | */ | |
187 | if (action->flags & NMI_FLAG_FIRST) | |
188 | list_add_rcu(&action->list, &desc->head); | |
189 | else | |
190 | list_add_tail_rcu(&action->list, &desc->head); | |
a7fed5c0 | 191 | |
c455fd92 | 192 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
c9126b2e DZ |
193 | return 0; |
194 | } | |
72b3fb24 | 195 | EXPORT_SYMBOL(__register_nmi_handler); |
c9126b2e | 196 | |
72b3fb24 | 197 | void unregister_nmi_handler(unsigned int type, const char *name) |
c9126b2e DZ |
198 | { |
199 | struct nmi_desc *desc = nmi_to_desc(type); | |
a7fed5c0 | 200 | struct nmiaction *n, *found = NULL; |
c9126b2e DZ |
201 | unsigned long flags; |
202 | ||
c455fd92 | 203 | raw_spin_lock_irqsave(&desc->lock, flags); |
c9126b2e DZ |
204 | |
205 | list_for_each_entry_rcu(n, &desc->head, list) { | |
206 | /* | |
207 | * the name passed in to describe the nmi handler | |
208 | * is used as the lookup key | |
209 | */ | |
210 | if (!strcmp(n->name, name)) { | |
211 | WARN(in_nmi(), | |
212 | "Trying to free NMI (%s) from NMI context!\n", n->name); | |
213 | list_del_rcu(&n->list); | |
a7fed5c0 | 214 | found = n; |
c9126b2e DZ |
215 | break; |
216 | } | |
217 | } | |
218 | ||
c455fd92 | 219 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
a7fed5c0 TG |
220 | if (found) { |
221 | synchronize_rcu(); | |
222 | INIT_LIST_HEAD(&found->list); | |
223 | } | |
c9126b2e | 224 | } |
c9126b2e DZ |
225 | EXPORT_SYMBOL_GPL(unregister_nmi_handler); |
226 | ||
9326638c | 227 | static void |
1d48922c DZ |
228 | pci_serr_error(unsigned char reason, struct pt_regs *regs) |
229 | { | |
553222f3 | 230 | /* check to see if anyone registered against these types of errors */ |
bf9f2ee2 | 231 | if (nmi_handle(NMI_SERR, regs)) |
553222f3 DZ |
232 | return; |
233 | ||
1d48922c DZ |
234 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", |
235 | reason, smp_processor_id()); | |
236 | ||
1d48922c | 237 | if (panic_on_unrecovered_nmi) |
58c5661f | 238 | nmi_panic(regs, "NMI: Not continuing"); |
1d48922c DZ |
239 | |
240 | pr_emerg("Dazed and confused, but trying to continue\n"); | |
241 | ||
242 | /* Clear and disable the PCI SERR error line. */ | |
243 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; | |
244 | outb(reason, NMI_REASON_PORT); | |
245 | } | |
9326638c | 246 | NOKPROBE_SYMBOL(pci_serr_error); |
1d48922c | 247 | |
9326638c | 248 | static void |
1d48922c DZ |
249 | io_check_error(unsigned char reason, struct pt_regs *regs) |
250 | { | |
251 | unsigned long i; | |
252 | ||
553222f3 | 253 | /* check to see if anyone registered against these types of errors */ |
bf9f2ee2 | 254 | if (nmi_handle(NMI_IO_CHECK, regs)) |
553222f3 DZ |
255 | return; |
256 | ||
1d48922c DZ |
257 | pr_emerg( |
258 | "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", | |
259 | reason, smp_processor_id()); | |
57da8b96 | 260 | show_regs(regs); |
1d48922c | 261 | |
1717f209 | 262 | if (panic_on_io_nmi) { |
58c5661f | 263 | nmi_panic(regs, "NMI IOCK error: Not continuing"); |
1717f209 HK |
264 | |
265 | /* | |
266 | * If we end up here, it means we have received an NMI while | |
267 | * processing panic(). Simply return without delaying and | |
268 | * re-enabling NMIs. | |
269 | */ | |
270 | return; | |
271 | } | |
1d48922c DZ |
272 | |
273 | /* Re-enable the IOCK line, wait for a few seconds */ | |
274 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; | |
275 | outb(reason, NMI_REASON_PORT); | |
276 | ||
277 | i = 20000; | |
278 | while (--i) { | |
279 | touch_nmi_watchdog(); | |
280 | udelay(100); | |
281 | } | |
282 | ||
283 | reason &= ~NMI_REASON_CLEAR_IOCHK; | |
284 | outb(reason, NMI_REASON_PORT); | |
285 | } | |
9326638c | 286 | NOKPROBE_SYMBOL(io_check_error); |
1d48922c | 287 | |
9326638c | 288 | static void |
1d48922c DZ |
289 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) |
290 | { | |
9c48f1c6 DZ |
291 | int handled; |
292 | ||
b227e233 DZ |
293 | /* |
294 | * Use 'false' as back-to-back NMIs are dealt with one level up. | |
295 | * Of course this makes having multiple 'unknown' handlers useless | |
296 | * as only the first one is ever run (unless it can actually determine | |
297 | * if it caused the NMI) | |
298 | */ | |
bf9f2ee2 | 299 | handled = nmi_handle(NMI_UNKNOWN, regs); |
efc3aac5 DZ |
300 | if (handled) { |
301 | __this_cpu_add(nmi_stats.unknown, handled); | |
1d48922c | 302 | return; |
efc3aac5 DZ |
303 | } |
304 | ||
305 | __this_cpu_add(nmi_stats.unknown, 1); | |
306 | ||
e2fbc857 NK |
307 | pr_emerg_ratelimited("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", |
308 | reason, smp_processor_id()); | |
1d48922c | 309 | |
1d48922c | 310 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) |
58c5661f | 311 | nmi_panic(regs, "NMI: Not continuing"); |
1d48922c | 312 | |
e2fbc857 | 313 | pr_emerg_ratelimited("Dazed and confused, but trying to continue\n"); |
1d48922c | 314 | } |
9326638c | 315 | NOKPROBE_SYMBOL(unknown_nmi_error); |
1d48922c | 316 | |
b227e233 DZ |
317 | static DEFINE_PER_CPU(bool, swallow_nmi); |
318 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); | |
319 | ||
f051f697 | 320 | static noinstr void default_do_nmi(struct pt_regs *regs) |
1d48922c DZ |
321 | { |
322 | unsigned char reason = 0; | |
9c48f1c6 | 323 | int handled; |
b227e233 | 324 | bool b2b = false; |
1d48922c DZ |
325 | |
326 | /* | |
327 | * CPU-specific NMI must be processed before non-CPU-specific | |
328 | * NMI, otherwise we may lose it, because the CPU-specific | |
329 | * NMI can not be detected/processed on other CPUs. | |
330 | */ | |
b227e233 DZ |
331 | |
332 | /* | |
333 | * Back-to-back NMIs are interesting because they can either | |
334 | * be two NMI or more than two NMIs (any thing over two is dropped | |
335 | * due to NMI being edge-triggered). If this is the second half | |
336 | * of the back-to-back NMI, assume we dropped things and process | |
337 | * more handlers. Otherwise reset the 'swallow' NMI behaviour | |
338 | */ | |
339 | if (regs->ip == __this_cpu_read(last_nmi_rip)) | |
340 | b2b = true; | |
341 | else | |
342 | __this_cpu_write(swallow_nmi, false); | |
343 | ||
344 | __this_cpu_write(last_nmi_rip, regs->ip); | |
345 | ||
f051f697 TG |
346 | instrumentation_begin(); |
347 | ||
7eb314a2 TG |
348 | if (microcode_nmi_handler_enabled() && microcode_nmi_handler()) |
349 | goto out; | |
350 | ||
bf9f2ee2 | 351 | handled = nmi_handle(NMI_LOCAL, regs); |
efc3aac5 | 352 | __this_cpu_add(nmi_stats.normal, handled); |
b227e233 DZ |
353 | if (handled) { |
354 | /* | |
355 | * There are cases when a NMI handler handles multiple | |
356 | * events in the current NMI. One of these events may | |
357 | * be queued for in the next NMI. Because the event is | |
358 | * already handled, the next NMI will result in an unknown | |
359 | * NMI. Instead lets flag this for a potential NMI to | |
360 | * swallow. | |
361 | */ | |
362 | if (handled > 1) | |
363 | __this_cpu_write(swallow_nmi, true); | |
f051f697 | 364 | goto out; |
b227e233 | 365 | } |
1d48922c | 366 | |
b279d67d HK |
367 | /* |
368 | * Non-CPU-specific NMI: NMI sources can be processed on any CPU. | |
369 | * | |
370 | * Another CPU may be processing panic routines while holding | |
371 | * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping, | |
372 | * and if so, call its callback directly. If there is no CPU preparing | |
373 | * crash dump, we simply loop here. | |
374 | */ | |
375 | while (!raw_spin_trylock(&nmi_reason_lock)) { | |
376 | run_crash_ipi_callback(regs); | |
377 | cpu_relax(); | |
378 | } | |
379 | ||
064a59b6 | 380 | reason = x86_platform.get_nmi_reason(); |
1d48922c DZ |
381 | |
382 | if (reason & NMI_REASON_MASK) { | |
383 | if (reason & NMI_REASON_SERR) | |
384 | pci_serr_error(reason, regs); | |
385 | else if (reason & NMI_REASON_IOCHK) | |
386 | io_check_error(reason, regs); | |
387 | #ifdef CONFIG_X86_32 | |
388 | /* | |
389 | * Reassert NMI in case it became active | |
390 | * meanwhile as it's edge-triggered: | |
391 | */ | |
392 | reassert_nmi(); | |
393 | #endif | |
efc3aac5 | 394 | __this_cpu_add(nmi_stats.external, 1); |
1d48922c | 395 | raw_spin_unlock(&nmi_reason_lock); |
f051f697 | 396 | goto out; |
1d48922c DZ |
397 | } |
398 | raw_spin_unlock(&nmi_reason_lock); | |
399 | ||
b227e233 DZ |
400 | /* |
401 | * Only one NMI can be latched at a time. To handle | |
402 | * this we may process multiple nmi handlers at once to | |
403 | * cover the case where an NMI is dropped. The downside | |
404 | * to this approach is we may process an NMI prematurely, | |
405 | * while its real NMI is sitting latched. This will cause | |
406 | * an unknown NMI on the next run of the NMI processing. | |
407 | * | |
408 | * We tried to flag that condition above, by setting the | |
409 | * swallow_nmi flag when we process more than one event. | |
410 | * This condition is also only present on the second half | |
411 | * of a back-to-back NMI, so we flag that condition too. | |
412 | * | |
413 | * If both are true, we assume we already processed this | |
414 | * NMI previously and we swallow it. Otherwise we reset | |
415 | * the logic. | |
416 | * | |
417 | * There are scenarios where we may accidentally swallow | |
418 | * a 'real' unknown NMI. For example, while processing | |
419 | * a perf NMI another perf NMI comes in along with a | |
420 | * 'real' unknown NMI. These two NMIs get combined into | |
4d1d0977 | 421 | * one (as described above). When the next NMI gets |
b227e233 | 422 | * processed, it will be flagged by perf as handled, but |
4d1d0977 | 423 | * no one will know that there was a 'real' unknown NMI sent |
b227e233 DZ |
424 | * also. As a result it gets swallowed. Or if the first |
425 | * perf NMI returns two events handled then the second | |
426 | * NMI will get eaten by the logic below, again losing a | |
427 | * 'real' unknown NMI. But this is the best we can do | |
428 | * for now. | |
429 | */ | |
430 | if (b2b && __this_cpu_read(swallow_nmi)) | |
efc3aac5 | 431 | __this_cpu_add(nmi_stats.swallow, 1); |
b227e233 DZ |
432 | else |
433 | unknown_nmi_error(reason, regs); | |
f051f697 TG |
434 | |
435 | out: | |
436 | instrumentation_end(); | |
1d48922c DZ |
437 | } |
438 | ||
ccd49c23 | 439 | /* |
0b22930e AL |
440 | * NMIs can page fault or hit breakpoints which will cause it to lose |
441 | * its NMI context with the CPU when the breakpoint or page fault does an IRET. | |
9d050416 AL |
442 | * |
443 | * As a result, NMIs can nest if NMIs get unmasked due an IRET during | |
444 | * NMI processing. On x86_64, the asm glue protects us from nested NMIs | |
445 | * if the outer NMI came from kernel mode, but we can still nest if the | |
446 | * outer NMI came from user mode. | |
447 | * | |
448 | * To handle these nested NMIs, we have three states: | |
ccd49c23 SR |
449 | * |
450 | * 1) not running | |
451 | * 2) executing | |
452 | * 3) latched | |
453 | * | |
454 | * When no NMI is in progress, it is in the "not running" state. | |
455 | * When an NMI comes in, it goes into the "executing" state. | |
456 | * Normally, if another NMI is triggered, it does not interrupt | |
457 | * the running NMI and the HW will simply latch it so that when | |
458 | * the first NMI finishes, it will restart the second NMI. | |
459 | * (Note, the latch is binary, thus multiple NMIs triggering, | |
460 | * when one is running, are ignored. Only one NMI is restarted.) | |
461 | * | |
9d050416 AL |
462 | * If an NMI executes an iret, another NMI can preempt it. We do not |
463 | * want to allow this new NMI to run, but we want to execute it when the | |
464 | * first one finishes. We set the state to "latched", and the exit of | |
465 | * the first NMI will perform a dec_return, if the result is zero | |
466 | * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the | |
467 | * dec_return would have set the state to NMI_EXECUTING (what we want it | |
468 | * to be when we are running). In this case, we simply jump back to | |
469 | * rerun the NMI handler again, and restart the 'latched' NMI. | |
c7d65a78 SR |
470 | * |
471 | * No trap (breakpoint or page fault) should be hit before nmi_restart, | |
472 | * thus there is no race between the first check of state for NOT_RUNNING | |
473 | * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs | |
474 | * at this point. | |
70fb74a5 SR |
475 | * |
476 | * In case the NMI takes a page fault, we need to save off the CR2 | |
477 | * because the NMI could have preempted another page fault and corrupt | |
478 | * the CR2 that is about to be read. As nested NMIs must be restarted | |
479 | * and they can not take breakpoints or page faults, the update of the | |
480 | * CR2 must be done before converting the nmi state back to NOT_RUNNING. | |
481 | * Otherwise, there would be a race of another nested NMI coming in | |
482 | * after setting state to NOT_RUNNING but before updating the nmi_cr2. | |
ccd49c23 SR |
483 | */ |
484 | enum nmi_states { | |
c7d65a78 | 485 | NMI_NOT_RUNNING = 0, |
ccd49c23 SR |
486 | NMI_EXECUTING, |
487 | NMI_LATCHED, | |
488 | }; | |
489 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | |
70fb74a5 | 490 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); |
fd338e35 | 491 | static DEFINE_PER_CPU(unsigned long, nmi_dr7); |
228bdaa9 | 492 | |
71ed49d8 | 493 | DEFINE_IDTENTRY_RAW(exc_nmi) |
ccd49c23 | 494 | { |
b6be002b | 495 | irqentry_state_t irq_state; |
1a3ea611 | 496 | struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats); |
ba1f2b2e | 497 | |
4ca68e02 JR |
498 | /* |
499 | * Re-enable NMIs right here when running as an SEV-ES guest. This might | |
500 | * cause nested NMIs, but those can be handled safely. | |
501 | */ | |
502 | sev_es_nmi_complete(); | |
1a3ea611 | 503 | if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) |
0f613bfa | 504 | raw_atomic_long_inc(&nsp->idt_calls); |
4ca68e02 | 505 | |
47403a4b | 506 | if (arch_cpu_is_offline(smp_processor_id())) { |
8f849ff6 TG |
507 | if (microcode_nmi_handler_enabled()) |
508 | microcode_offline_nmi_handler(); | |
60dcaad5 | 509 | return; |
8f849ff6 | 510 | } |
60dcaad5 | 511 | |
9d050416 AL |
512 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { |
513 | this_cpu_write(nmi_state, NMI_LATCHED); | |
514 | return; | |
515 | } | |
516 | this_cpu_write(nmi_state, NMI_EXECUTING); | |
517 | this_cpu_write(nmi_cr2, read_cr2()); | |
f44075ec PM |
518 | |
519 | nmi_restart: | |
1a3ea611 PM |
520 | if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { |
521 | WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1); | |
522 | WARN_ON_ONCE(!(nsp->idt_seq & 0x1)); | |
523 | WRITE_ONCE(nsp->recv_jiffies, jiffies); | |
524 | } | |
9d050416 | 525 | |
315562c9 JR |
526 | /* |
527 | * Needs to happen before DR7 is accessed, because the hypervisor can | |
528 | * intercept DR7 reads/writes, turning those into #VC exceptions. | |
529 | */ | |
530 | sev_es_ist_enter(regs); | |
531 | ||
fd338e35 | 532 | this_cpu_write(nmi_dr7, local_db_save()); |
ccd49c23 | 533 | |
b6be002b | 534 | irq_state = irqentry_nmi_enter(regs); |
1d48922c DZ |
535 | |
536 | inc_irq_stat(__nmi_count); | |
537 | ||
1a3ea611 PM |
538 | if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) { |
539 | WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1); | |
540 | } else if (!ignore_nmis) { | |
541 | if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { | |
542 | WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1); | |
543 | WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1)); | |
544 | } | |
1d48922c | 545 | default_do_nmi(regs); |
1a3ea611 PM |
546 | if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { |
547 | WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1); | |
548 | WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1); | |
549 | } | |
550 | } | |
1d48922c | 551 | |
b6be002b | 552 | irqentry_nmi_exit(regs, irq_state); |
228bdaa9 | 553 | |
fd338e35 | 554 | local_db_restore(this_cpu_read(nmi_dr7)); |
9d050416 | 555 | |
315562c9 JR |
556 | sev_es_ist_exit(); |
557 | ||
9d050416 AL |
558 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) |
559 | write_cr2(this_cpu_read(nmi_cr2)); | |
1a3ea611 PM |
560 | if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) { |
561 | WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1); | |
562 | WARN_ON_ONCE(nsp->idt_seq & 0x1); | |
563 | WRITE_ONCE(nsp->recv_jiffies, jiffies); | |
564 | } | |
f44075ec PM |
565 | if (this_cpu_dec_return(nmi_state)) |
566 | goto nmi_restart; | |
1d48922c DZ |
567 | } |
568 | ||
54a3b70a SC |
569 | #if IS_ENABLED(CONFIG_KVM_INTEL) |
570 | DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx) | |
a217a659 LJ |
571 | { |
572 | exc_nmi(regs); | |
573 | } | |
a217a659 | 574 | #if IS_MODULE(CONFIG_KVM_INTEL) |
54a3b70a SC |
575 | EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); |
576 | #endif | |
a217a659 LJ |
577 | #endif |
578 | ||
344da544 PM |
579 | #ifdef CONFIG_NMI_CHECK_CPU |
580 | ||
581 | static char *nmi_check_stall_msg[] = { | |
582 | /* */ | |
3186b618 | 583 | /* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */ |
344da544 PM |
584 | /* | +------ cpu_is_offline(cpu) */ |
585 | /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ | |
586 | /* | | | NMI handler has been invoked. */ | |
587 | /* | | | */ | |
588 | /* V V V */ | |
589 | /* 0 0 0 */ "NMIs are not reaching exc_nmi() handler", | |
590 | /* 0 0 1 */ "exc_nmi() handler is ignoring NMIs", | |
591 | /* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler", | |
592 | /* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs", | |
593 | /* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler", | |
594 | /* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs", | |
595 | /* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler", | |
596 | /* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs", | |
597 | }; | |
598 | ||
599 | void nmi_backtrace_stall_snap(const struct cpumask *btp) | |
600 | { | |
601 | int cpu; | |
602 | struct nmi_stats *nsp; | |
603 | ||
604 | for_each_cpu(cpu, btp) { | |
605 | nsp = per_cpu_ptr(&nmi_stats, cpu); | |
606 | nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq); | |
607 | nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq); | |
608 | nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored); | |
609 | nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls); | |
610 | } | |
611 | } | |
612 | ||
613 | void nmi_backtrace_stall_check(const struct cpumask *btp) | |
614 | { | |
615 | int cpu; | |
616 | int idx; | |
617 | unsigned long nmi_seq; | |
618 | unsigned long j = jiffies; | |
619 | char *modp; | |
620 | char *msgp; | |
621 | char *msghp; | |
622 | struct nmi_stats *nsp; | |
623 | ||
624 | for_each_cpu(cpu, btp) { | |
625 | nsp = per_cpu_ptr(&nmi_stats, cpu); | |
626 | modp = ""; | |
627 | msghp = ""; | |
628 | nmi_seq = READ_ONCE(nsp->idt_nmi_seq); | |
629 | if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) { | |
630 | msgp = "CPU entered NMI handler function, but has not exited"; | |
3186b618 PM |
631 | } else if (nsp->idt_nmi_seq_snap == nmi_seq || |
632 | nsp->idt_nmi_seq_snap + 1 == nmi_seq) { | |
633 | idx = ((nmi_seq & 0x1) << 2) | | |
344da544 PM |
634 | (cpu_is_offline(cpu) << 1) | |
635 | (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls)); | |
636 | msgp = nmi_check_stall_msg[idx]; | |
637 | if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1)) | |
638 | modp = ", but OK because ignore_nmis was set"; | |
3186b618 | 639 | if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) |
344da544 | 640 | msghp = " (CPU exited one NMI handler function)"; |
3186b618 PM |
641 | else if (nmi_seq & 0x1) |
642 | msghp = " (CPU currently in NMI handler function)"; | |
643 | else | |
644 | msghp = " (CPU was never in an NMI handler function)"; | |
645 | } else { | |
646 | msgp = "CPU is handling NMIs"; | |
344da544 | 647 | } |
3186b618 PM |
648 | pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp); |
649 | pr_alert("%s: last activity: %lu jiffies ago.\n", | |
650 | __func__, j - READ_ONCE(nsp->recv_jiffies)); | |
344da544 PM |
651 | } |
652 | } | |
653 | ||
654 | #endif | |
655 | ||
f8b8ee45 PAI |
656 | #ifdef CONFIG_X86_FRED |
657 | /* | |
658 | * With FRED, CR2/DR6 is pushed to #PF/#DB stack frame during FRED | |
659 | * event delivery, i.e., there is no problem of transient states. | |
660 | * And NMI unblocking only happens when the stack frame indicates | |
661 | * that so should happen. | |
662 | * | |
663 | * Thus, the NMI entry stub for FRED is really straightforward and | |
664 | * as simple as most exception handlers. As such, #DB is allowed | |
665 | * during NMI handling. | |
666 | */ | |
667 | DEFINE_FREDENTRY_NMI(exc_nmi) | |
668 | { | |
669 | irqentry_state_t irq_state; | |
670 | ||
671 | if (arch_cpu_is_offline(smp_processor_id())) { | |
672 | if (microcode_nmi_handler_enabled()) | |
673 | microcode_offline_nmi_handler(); | |
674 | return; | |
675 | } | |
676 | ||
677 | /* | |
678 | * Save CR2 for eventual restore to cover the case where the NMI | |
679 | * hits the VMENTER/VMEXIT region where guest CR2 is life. This | |
680 | * prevents guest state corruption in case that the NMI handler | |
681 | * takes a page fault. | |
682 | */ | |
683 | this_cpu_write(nmi_cr2, read_cr2()); | |
684 | ||
685 | irq_state = irqentry_nmi_enter(regs); | |
686 | ||
687 | inc_irq_stat(__nmi_count); | |
688 | default_do_nmi(regs); | |
689 | ||
690 | irqentry_nmi_exit(regs, irq_state); | |
691 | ||
692 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) | |
693 | write_cr2(this_cpu_read(nmi_cr2)); | |
694 | } | |
695 | #endif | |
696 | ||
1d48922c DZ |
697 | void stop_nmi(void) |
698 | { | |
699 | ignore_nmis++; | |
700 | } | |
701 | ||
702 | void restart_nmi(void) | |
703 | { | |
704 | ignore_nmis--; | |
705 | } | |
b227e233 DZ |
706 | |
707 | /* reset the back-to-back NMI logic */ | |
708 | void local_touch_nmi(void) | |
709 | { | |
710 | __this_cpu_write(last_nmi_rip, 0); | |
711 | } | |
29c6fb7b | 712 | EXPORT_SYMBOL_GPL(local_touch_nmi); |