panic, x86: Allow CPUs to save registers even if looping in NMI context

author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>

Mon, 14 Dec 2015 10:19:10 +0000 (11:19 +0100)

committer Thomas Gleixner <tglx@linutronix.de>

Sat, 19 Dec 2015 10:07:01 +0000 (11:07 +0100)
author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Mon, 14 Dec 2015 10:19:10 +0000 (11:19 +0100)
committer Thomas Gleixner <tglx@linutronix.de>
Sat, 19 Dec 2015 10:07:01 +0000 (11:07 +0100)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index fca87938d739a15e1828e53e6cc706902bad4662..424aec4a4c712aa211def505f0df7b448c4bb0ed 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
  #endif
  
         if (panic_on_unrecovered_nmi)
-               nmi_panic("NMI: Not continuing");
+               nmi_panic(regs, "NMI: Not continuing");
  
         pr_emerg("Dazed and confused, but trying to continue\n");
  
@@ -256,7 +256,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
         show_regs(regs);
  
         if (panic_on_io_nmi) {
-               nmi_panic("NMI IOCK error: Not continuing");
+               nmi_panic(regs, "NMI IOCK error: Not continuing");
  
                 /*
                  * If we end up here, it means we have received an NMI while
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
  
         pr_emerg("Do you have a strange power saving mode enabled?\n");
         if (unknown_nmi_panic || panic_on_unrecovered_nmi)
-               nmi_panic("NMI: Not continuing");
+               nmi_panic(regs, "NMI: Not continuing");
  
         pr_emerg("Dazed and confused, but trying to continue\n");
  }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c

index 02693dd9a0790b804a515294714d59ed68688ba8..1da13022d544475a67ee6a69565ce7e2d63a82c4 100644 (file)
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -718,6 +718,7 @@ static int crashing_cpu;
  static nmi_shootdown_cb shootdown_callback;
  
  static atomic_t waiting_for_crash_ipi;
+static int crash_ipi_issued;
  
  static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
  {
@@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
  
         smp_send_nmi_allbutself();
  
+       /* Kick CPUs looping in NMI context. */
+       WRITE_ONCE(crash_ipi_issued, 1);
+
         msecs = 1000; /* Wait at most a second for the other cpus to stop */
         while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
                 mdelay(1);
@@ -788,6 +792,22 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
  
         /* Leave the nmi callback set */
  }
+
+/* Override the weak function in kernel/panic.c */
+void nmi_panic_self_stop(struct pt_regs *regs)
+{
+       while (1) {
+               /*
+                * Wait for the crash dumping IPI to be issued, and then
+                * call its callback directly.
+                */
+               if (READ_ONCE(crash_ipi_issued))
+                       crash_nmi_callback(0, regs); /* Don't return */
+
+               cpu_relax();
+       }
+}
+
  #else /* !CONFIG_SMP */
  void nmi_shootdown_cpus(nmi_shootdown_cb callback)
  {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h

index 750cc5c7c99980e6ea32fc61967ee9f3990f37c2..7311c3294e25f22a610209a63115c3d0778ecd0b 100644 (file)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -255,6 +255,7 @@ extern long (*panic_blink)(int state);
  __printf(1, 2)
  void panic(const char *fmt, ...)
         __noreturn __cold;
+void nmi_panic_self_stop(struct pt_regs *);
  extern void oops_enter(void);
  extern void oops_exit(void);
  void print_oops_end_marker(void);
@@ -455,14 +456,21 @@ extern atomic_t panic_cpu;
  
  /*
   * A variant of panic() called from NMI context. We return if we've already
- * panicked on this CPU.
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
   */
-#define nmi_panic(fmt, ...)                                            \
+#define nmi_panic(regs, fmt, ...)                                      \
  do {                                                                   \
-       int cpu = raw_smp_processor_id();                               \
+       int old_cpu, cpu;                                               \
                                                                         \
-       if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu)  \
+       cpu = raw_smp_processor_id();                                   \
+       old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);   \
+                                                                       \
+       if (old_cpu == PANIC_CPU_INVALID)                               \
                 panic(fmt, ##__VA_ARGS__);                              \
+       else if (old_cpu != cpu)                                        \
+               nmi_panic_self_stop(regs);                              \
  } while (0)
  
  /*
diff --git a/kernel/panic.c b/kernel/panic.c

index 3344524cf6ffd658c901b383af317aa095e4cff0..06f31b49b3b4775c2d55f3a02435fb858764ce53 100644 (file)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -61,6 +61,15 @@ void __weak panic_smp_self_stop(void)
                 cpu_relax();
  }
  
+/*
+ * Stop ourselves in NMI context if another CPU has already panicked. Arch code
+ * may override this to prepare for crash dumping, e.g. save regs info.
+ */
+void __weak nmi_panic_self_stop(struct pt_regs *regs)
+{
+       panic_smp_self_stop();
+}
+
  atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
  
  /**
diff --git a/kernel/watchdog.c b/kernel/watchdog.c

index b9be18fae15455f7483c0fba4f9ada15f232ff7d..84b5035cb6a57099362ec9ab6056d6c936f5b706 100644 (file)
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -351,7 +351,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
                         trigger_allbutself_cpu_backtrace();
  
                 if (hardlockup_panic)
-                       nmi_panic("Hard LOCKUP");
+                       nmi_panic(regs, "Hard LOCKUP");
  
                 __this_cpu_write(hard_watchdog_warn, true);
                 return;
author	Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
	Mon, 14 Dec 2015 10:19:10 +0000 (11:19 +0100)
committer	Thomas Gleixner <tglx@linutronix.de>
	Sat, 19 Dec 2015 10:07:01 +0000 (11:07 +0100)
arch/x86/kernel/nmi.c		patch \| blob \| blame \| history
arch/x86/kernel/reboot.c		patch \| blob \| blame \| history
include/linux/kernel.h		patch \| blob \| blame \| history
kernel/panic.c		patch \| blob \| blame \| history
kernel/watchdog.c		patch \| blob \| blame \| history