workqueue: add cmdline parameter workqueue.panic_on_stall
authorSangmoon Kim <sangmoon.kim@samsung.com>
Tue, 6 Aug 2024 05:12:09 +0000 (14:12 +0900)
committerTejun Heo <tj@kernel.org>
Tue, 6 Aug 2024 20:06:31 +0000 (10:06 -1000)
When we want to debug the workqueue stall, we can immediately make
a panic to get the information we want.

In some systems, it may be necessary to quickly reboot the system to
escape from a workqueue lockup situation. In this case, we can control
the number of stall detections to generate panic.

workqueue.panic_on_stall sets the number times of the stall to trigger
panic. 0 disables the panic on stall.

Signed-off-by: Sangmoon Kim <sangmoon.kim@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/workqueue.c

index 1745ca788ede3e51ee09c3db11ff9e93ac81caf8..80cb0a923046ba9b20f04ed2c04429b040bfe6a8 100644 (file)
@@ -7398,6 +7398,9 @@ static struct timer_list wq_watchdog_timer;
 static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
 static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
 
+static unsigned int wq_panic_on_stall;
+module_param_named(panic_on_stall, wq_panic_on_stall, uint, 0644);
+
 /*
  * Show workers that might prevent the processing of pending work items.
  * The only candidates are CPU-bound workers in the running state.
@@ -7449,6 +7452,16 @@ static void show_cpu_pools_hogs(void)
        rcu_read_unlock();
 }
 
+static void panic_on_wq_watchdog(void)
+{
+       static unsigned int wq_stall;
+
+       if (wq_panic_on_stall) {
+               wq_stall++;
+               BUG_ON(wq_stall >= wq_panic_on_stall);
+       }
+}
+
 static void wq_watchdog_reset_touched(void)
 {
        int cpu;
@@ -7521,6 +7534,9 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
        if (cpu_pool_stall)
                show_cpu_pools_hogs();
 
+       if (lockup_detected)
+               panic_on_wq_watchdog();
+
        wq_watchdog_reset_touched();
        mod_timer(&wq_watchdog_timer, jiffies + thresh);
 }