watchdog/softlockup: Report the most frequent interrupts

[linux-2.6-block.git] / kernel / watchdog.c
diff --git a/kernel/watchdog.c b/kernel/watchdog.c

index ef8ebd31fdab7f8b484df48dde7e09d4700e0e51..d12ff74889ed54a0247506eb5f3cb24faf7dee6a 100644 (file)
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -12,22 +12,25 @@
  
  #define pr_fmt(fmt) "watchdog: " fmt
  
-#include <linux/mm.h>
  #include <linux/cpu.h>
-#include <linux/nmi.h>
  #include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
  #include <linux/kernel_stat.h>
+#include <linux/kvm_para.h>
  #include <linux/math64.h>
+#include <linux/mm.h>
  #include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/stop_machine.h>
  #include <linux/sysctl.h>
  #include <linux/tick.h>
+
  #include <linux/sched/clock.h>
  #include <linux/sched/debug.h>
  #include <linux/sched/isolation.h>
-#include <linux/stop_machine.h>
  
  #include <asm/irq_regs.h>
-#include <linux/kvm_para.h>
  
  static DEFINE_MUTEX(watchdog_mutex);
  
@@ -418,13 +421,105 @@ static void print_cpustat(void)
         }
  }
  
+#define HARDIRQ_PERCENT_THRESH          50
+#define NUM_HARDIRQ_REPORT              5
+struct irq_counts {
+       int irq;
+       u32 counts;
+};
+
+static DEFINE_PER_CPU(bool, snapshot_taken);
+
+/* Tabulate the most frequent interrupts. */
+static void tabulate_irq_count(struct irq_counts *irq_counts, int irq, u32 counts, int rank)
+{
+       int i;
+       struct irq_counts new_count = {irq, counts};
+
+       for (i = 0; i < rank; i++) {
+               if (counts > irq_counts[i].counts)
+                       swap(new_count, irq_counts[i]);
+       }
+}
+
+/*
+ * If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period,
+ * then the cause of softlockup might be interrupt storm. In this case, it
+ * would be useful to start interrupt counting.
+ */
+static bool need_counting_irqs(void)
+{
+       u8 util;
+       int tail = __this_cpu_read(cpustat_tail);
+
+       tail = (tail + NUM_HARDIRQ_REPORT - 1) % NUM_HARDIRQ_REPORT;
+       util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]);
+       return util > HARDIRQ_PERCENT_THRESH;
+}
+
+static void start_counting_irqs(void)
+{
+       if (!__this_cpu_read(snapshot_taken)) {
+               kstat_snapshot_irqs();
+               __this_cpu_write(snapshot_taken, true);
+       }
+}
+
+static void stop_counting_irqs(void)
+{
+       __this_cpu_write(snapshot_taken, false);
+}
+
+static void print_irq_counts(void)
+{
+       unsigned int i, count;
+       struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = {
+               {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}, {-1, 0}
+       };
+
+       if (__this_cpu_read(snapshot_taken)) {
+               for_each_active_irq(i) {
+                       count = kstat_get_irq_since_snapshot(i);
+                       tabulate_irq_count(irq_counts_sorted, i, count, NUM_HARDIRQ_REPORT);
+               }
+
+               /*
+                * Outputting the "watchdog" prefix on every line is redundant and not
+                * concise, and the original alarm information is sufficient for
+                * positioning in logs, hence here printk() is used instead of pr_crit().
+                */
+               printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most frequent HardIRQs:\n",
+                      smp_processor_id(), HARDIRQ_PERCENT_THRESH);
+
+               for (i = 0; i < NUM_HARDIRQ_REPORT; i++) {
+                       if (irq_counts_sorted[i].irq == -1)
+                               break;
+
+                       printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n",
+                              i + 1, irq_counts_sorted[i].counts,
+                              irq_counts_sorted[i].irq);
+               }
+
+               /*
+                * If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in the last
+                * sample_period, then we suspect the interrupt storm might be subsiding.
+                */
+               if (!need_counting_irqs())
+                       stop_counting_irqs();
+       }
+}
+
  static void report_cpu_status(void)
  {
         print_cpustat();
+       print_irq_counts();
  }
  #else
  static inline void update_cpustat(void) { }
  static inline void report_cpu_status(void) { }
+static inline bool need_counting_irqs(void) { return false; }
+static inline void start_counting_irqs(void) { }
+static inline void stop_counting_irqs(void) { }
  #endif
  
  /*
@@ -528,6 +623,18 @@ static int is_softlockup(unsigned long touch_ts,
                          unsigned long now)
  {
         if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) {
+               /*
+                * If period_ts has not been updated during a sample_period, then
+                * in the subsequent few sample_periods, period_ts might also not
+                * be updated, which could indicate a potential softlockup. In
+                * this case, if we suspect the cause of the potential softlockup
+                * might be interrupt storm, then we need to count the interrupts
+                * to find which interrupt is storming.
+                */
+               if (time_after_eq(now, period_ts + get_softlockup_thresh() / NUM_SAMPLE_PERIODS) &&
+                   need_counting_irqs())
+                       start_counting_irqs();
+
                 /* Warn about unreasonable delays. */
                 if (time_after(now, period_ts + get_softlockup_thresh()))
                         return now - touch_ts;
@@ -550,6 +657,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
  static int softlockup_fn(void *data)
  {
         update_touch_ts();
+       stop_counting_irqs();
         complete(this_cpu_ptr(&softlockup_completion));
  
         return 0;