panic, x86: Fix re-entrance problem due to panic on NMI

author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>

Mon, 14 Dec 2015 10:19:09 +0000 (11:19 +0100)

committer Thomas Gleixner <tglx@linutronix.de>

Sat, 19 Dec 2015 10:07:00 +0000 (11:07 +0100)
author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Mon, 14 Dec 2015 10:19:09 +0000 (11:19 +0100)
committer Thomas Gleixner <tglx@linutronix.de>
Sat, 19 Dec 2015 10:07:00 +0000 (11:07 +0100)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index 697f90db0e37db9bbde8f076c998ccaed8e9c499..fca87938d739a15e1828e53e6cc706902bad4662 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
  #endif
  
         if (panic_on_unrecovered_nmi)
-               panic("NMI: Not continuing");
+               nmi_panic("NMI: Not continuing");
  
         pr_emerg("Dazed and confused, but trying to continue\n");
  
@@ -255,8 +255,16 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
                  reason, smp_processor_id());
         show_regs(regs);
  
-       if (panic_on_io_nmi)
-               panic("NMI IOCK error: Not continuing");
+       if (panic_on_io_nmi) {
+               nmi_panic("NMI IOCK error: Not continuing");
+
+               /*
+                * If we end up here, it means we have received an NMI while
+                * processing panic(). Simply return without delaying and
+                * re-enabling NMIs.
+                */
+               return;
+       }
  
         /* Re-enable the IOCK line, wait for a few seconds */
         reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
@@ -297,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
  
         pr_emerg("Do you have a strange power saving mode enabled?\n");
         if (unknown_nmi_panic || panic_on_unrecovered_nmi)
-               panic("NMI: Not continuing");
+               nmi_panic("NMI: Not continuing");
  
         pr_emerg("Dazed and confused, but trying to continue\n");
  }
diff --git a/include/linux/kernel.h b/include/linux/kernel.h

index 350dfb08aee36bdce54c8ecf683dd7afed7b46ab..750cc5c7c99980e6ea32fc61967ee9f3990f37c2 100644 (file)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -445,6 +445,26 @@ extern int sysctl_panic_on_stackoverflow;
  
  extern bool crash_kexec_post_notifiers;
  
+/*
+ * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
+ * holds a CPU number which is executing panic() currently. A value of
+ * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
+ */
+extern atomic_t panic_cpu;
+#define PANIC_CPU_INVALID      -1
+
+/*
+ * A variant of panic() called from NMI context. We return if we've already
+ * panicked on this CPU.
+ */
+#define nmi_panic(fmt, ...)                                            \
+do {                                                                   \
+       int cpu = raw_smp_processor_id();                               \
+                                                                       \
+       if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu)  \
+               panic(fmt, ##__VA_ARGS__);                              \
+} while (0)
+
  /*
   * Only to be used by arch init code. If the user over-wrote the default
   * CONFIG_PANIC_TIMEOUT, honor it.
diff --git a/kernel/panic.c b/kernel/panic.c

index 4b150bc0c6c111ee09f783eaa6aa101339f6ceec..3344524cf6ffd658c901b383af317aa095e4cff0 100644 (file)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -61,6 +61,8 @@ void __weak panic_smp_self_stop(void)
                 cpu_relax();
  }
  
+atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
+
  /**
   *     panic - halt the system
   *     @fmt: The text string to print
@@ -71,17 +73,17 @@ void __weak panic_smp_self_stop(void)
   */
  void panic(const char *fmt, ...)
  {
-       static DEFINE_SPINLOCK(panic_lock);
         static char buf[1024];
         va_list args;
         long i, i_next = 0;
         int state = 0;
+       int old_cpu, this_cpu;
  
         /*
          * Disable local interrupts. This will prevent panic_smp_self_stop
          * from deadlocking the first cpu that invokes the panic, since
          * there is nothing to prevent an interrupt handler (that runs
-        * after the panic_lock is acquired) from invoking panic again.
+        * after setting panic_cpu) from invoking panic() again.
          */
         local_irq_disable();
  
@@ -94,8 +96,16 @@ void panic(const char *fmt, ...)
          * multiple parallel invocations of panic, all other CPUs either
          * stop themself or will wait until they are stopped by the 1st CPU
          * with smp_send_stop().
+        *
+        * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
+        * comes here, so go ahead.
+        * `old_cpu == this_cpu' means we came from nmi_panic() which sets
+        * panic_cpu to this CPU.  In this case, this is also the 1st CPU.
          */
-       if (!spin_trylock(&panic_lock))
+       this_cpu = raw_smp_processor_id();
+       old_cpu  = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
+
+       if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
                 panic_smp_self_stop();
  
         console_verbose();
diff --git a/kernel/watchdog.c b/kernel/watchdog.c

index 18f34cf75f741e2a63db6dc6c522dfa2e03d1a54..b9be18fae15455f7483c0fba4f9ada15f232ff7d 100644 (file)
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -351,7 +351,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
                         trigger_allbutself_cpu_backtrace();
  
                 if (hardlockup_panic)
-                       panic("Hard LOCKUP");
+                       nmi_panic("Hard LOCKUP");
  
                 __this_cpu_write(hard_watchdog_warn, true);
                 return;
author	Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
	Mon, 14 Dec 2015 10:19:09 +0000 (11:19 +0100)
committer	Thomas Gleixner <tglx@linutronix.de>
	Sat, 19 Dec 2015 10:07:00 +0000 (11:07 +0100)
arch/x86/kernel/nmi.c		patch \| blob \| blame \| history
include/linux/kernel.h		patch \| blob \| blame \| history
kernel/panic.c		patch \| blob \| blame \| history
kernel/watchdog.c		patch \| blob \| blame \| history