Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / kernel / watchdog.c
index af37c040436c96dc85e04d409bcd56873108ef83..6bcb854909c0b6f1563469ae3517fa1f89c43268 100644 (file)
 
 static DEFINE_MUTEX(watchdog_mutex);
 
-int __read_mostly nmi_watchdog_enabled;
-
 #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
-                                               NMI_WATCHDOG_ENABLED;
+# define WATCHDOG_DEFAULT      (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
+# define NMI_WATCHDOG_DEFAULT  1
 #else
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+# define WATCHDOG_DEFAULT      (SOFT_WATCHDOG_ENABLED)
+# define NMI_WATCHDOG_DEFAULT  0
 #endif
 
+unsigned long __read_mostly watchdog_enabled;
+int __read_mostly watchdog_user_enabled = 1;
+int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
+int __read_mostly soft_watchdog_user_enabled = 1;
+int __read_mostly watchdog_thresh = 10;
+int __read_mostly nmi_watchdog_available;
+
+struct cpumask watchdog_allowed_mask __read_mostly;
+
+struct cpumask watchdog_cpumask __read_mostly;
+unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
+
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-/* boot commands */
 /*
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
@@ -57,7 +67,7 @@ unsigned int __read_mostly hardlockup_panic =
  */
 void __init hardlockup_detector_disable(void)
 {
-       watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+       nmi_watchdog_user_enabled = 0;
 }
 
 static int __init hardlockup_panic_setup(char *str)
@@ -67,35 +77,24 @@ static int __init hardlockup_panic_setup(char *str)
        else if (!strncmp(str, "nopanic", 7))
                hardlockup_panic = 0;
        else if (!strncmp(str, "0", 1))
-               watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+               nmi_watchdog_user_enabled = 0;
        else if (!strncmp(str, "1", 1))
-               watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+               nmi_watchdog_user_enabled = 1;
        return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
 
-#endif
-
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-int __read_mostly soft_watchdog_enabled;
-#endif
-
-int __read_mostly watchdog_user_enabled;
-int __read_mostly watchdog_thresh = 10;
-
-#ifdef CONFIG_SMP
-int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+# ifdef CONFIG_SMP
 int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
-#endif
-struct cpumask watchdog_cpumask __read_mostly;
-unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
-/*
- * The 'watchdog_running' variable is set to 1 when the watchdog threads
- * are registered/started and is set to 0 when the watchdog threads are
- * unregistered/stopped, so it is an indicator whether the threads exist.
- */
-static int __read_mostly watchdog_running;
+static int __init hardlockup_all_cpu_backtrace_setup(char *str)
+{
+       sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
+       return 1;
+}
+__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
+# endif /* CONFIG_SMP */
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
 /*
  * These functions can be overridden if an architecture implements its
@@ -107,6 +106,7 @@ static int __read_mostly watchdog_running;
  */
 int __weak watchdog_nmi_enable(unsigned int cpu)
 {
+       hardlockup_detector_perf_enable();
        return 0;
 }
 
@@ -115,29 +115,59 @@ void __weak watchdog_nmi_disable(unsigned int cpu)
        hardlockup_detector_perf_disable();
 }
 
-/*
- * watchdog_nmi_reconfigure can be implemented to be notified after any
- * watchdog configuration change. The arch hardlockup watchdog should
- * respond to the following variables:
- * - nmi_watchdog_enabled
+/* Return 0, if a NMI watchdog is available. Error code otherwise */
+int __weak __init watchdog_nmi_probe(void)
+{
+       return hardlockup_detector_perf_init();
+}
+
+/**
+ * watchdog_nmi_stop - Stop the watchdog for reconfiguration
+ *
+ * The reconfiguration steps are:
+ * watchdog_nmi_stop();
+ * update_variables();
+ * watchdog_nmi_start();
+ */
+void __weak watchdog_nmi_stop(void) { }
+
+/**
+ * watchdog_nmi_start - Start the watchdog after reconfiguration
+ *
+ * Counterpart to watchdog_nmi_stop().
+ *
+ * The following variables have been updated in update_variables() and
+ * contain the currently valid configuration:
+ * - watchdog_enabled
  * - watchdog_thresh
  * - watchdog_cpumask
- * - sysctl_hardlockup_all_cpu_backtrace
- * - hardlockup_panic
  */
-void __weak watchdog_nmi_reconfigure(void) { }
+void __weak watchdog_nmi_start(void) { }
 
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+/**
+ * lockup_detector_update_enable - Update the sysctl enable bit
+ *
+ * Caller needs to make sure that the NMI/perf watchdogs are off, so this
+ * can't race with watchdog_nmi_disable().
+ */
+static void lockup_detector_update_enable(void)
+{
+       watchdog_enabled = 0;
+       if (!watchdog_user_enabled)
+               return;
+       if (nmi_watchdog_available && nmi_watchdog_user_enabled)
+               watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+       if (soft_watchdog_user_enabled)
+               watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
+}
 
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
-       for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 
 /* Global variables, exported for sysctl */
 unsigned int __read_mostly softlockup_panic =
                        CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
-int __read_mostly soft_watchdog_enabled;
 
+static bool softlockup_threads_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -160,35 +190,27 @@ __setup("softlockup_panic=", softlockup_panic_setup);
 
 static int __init nowatchdog_setup(char *str)
 {
-       watchdog_enabled = 0;
+       watchdog_user_enabled = 0;
        return 1;
 }
 __setup("nowatchdog", nowatchdog_setup);
 
 static int __init nosoftlockup_setup(char *str)
 {
-       watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;
+       soft_watchdog_user_enabled = 0;
        return 1;
 }
 __setup("nosoftlockup", nosoftlockup_setup);
 
 #ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+
 static int __init softlockup_all_cpu_backtrace_setup(char *str)
 {
-       sysctl_softlockup_all_cpu_backtrace =
-               !!simple_strtol(str, NULL, 0);
+       sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
        return 1;
 }
 __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int __init hardlockup_all_cpu_backtrace_setup(char *str)
-{
-       sysctl_hardlockup_all_cpu_backtrace =
-               !!simple_strtol(str, NULL, 0);
-       return 1;
-}
-__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
-#endif
 #endif
 
 static void __lockup_detector_cleanup(void);
@@ -263,11 +285,15 @@ void touch_all_softlockup_watchdogs(void)
        int cpu;
 
        /*
-        * this is done lockless
-        * do we care if a 0 races with a timestamp?
-        * all it means is the softlock check starts one cycle later
+        * watchdog_mutex cannpt be taken here, as this might be called
+        * from (soft)interrupt context, so the access to
+        * watchdog_allowed_cpumask might race with a concurrent update.
+        *
+        * The watchdog time stamp can race against a concurrent real
+        * update as well, the only side effect might be a cycle delay for
+        * the softlockup check.
         */
-       for_each_watchdog_cpu(cpu)
+       for_each_cpu(cpu, &watchdog_allowed_mask)
                per_cpu(watchdog_touch_ts, cpu) = 0;
        wq_watchdog_touch(-1);
 }
@@ -307,9 +333,6 @@ static void watchdog_interrupt_count(void)
        __this_cpu_inc(hrtimer_interrupts);
 }
 
-static int watchdog_enable_all_cpus(void);
-static void watchdog_disable_all_cpus(void);
-
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -446,7 +469,8 @@ static void watchdog_enable(unsigned int cpu)
        /* Initialize timestamp */
        __touch_watchdog();
        /* Enable the perf event */
-       watchdog_nmi_enable(cpu);
+       if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
+               watchdog_nmi_enable(cpu);
 
        watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
 }
@@ -502,93 +526,81 @@ static struct smp_hotplug_thread watchdog_threads = {
        .unpark                 = watchdog_enable,
 };
 
-/*
- * park all watchdog threads that are specified in 'watchdog_cpumask'
- *
- * This function returns an error if kthread_park() of a watchdog thread
- * fails. In this situation, the watchdog threads of some CPUs can already
- * be parked and the watchdog threads of other CPUs can still be runnable.
- * Callers are expected to handle this special condition as appropriate in
- * their context.
- *
- * This function may only be called in a context that is protected against
- * races with CPU hotplug - for example, via get_online_cpus().
- */
-static int watchdog_park_threads(void)
+static void softlockup_update_smpboot_threads(void)
 {
-       int cpu, ret = 0;
+       lockdep_assert_held(&watchdog_mutex);
 
-       for_each_watchdog_cpu(cpu) {
-               ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
-               if (ret)
-                       break;
-       }
-       return ret;
+       if (!softlockup_threads_initialized)
+               return;
+
+       smpboot_update_cpumask_percpu_thread(&watchdog_threads,
+                                            &watchdog_allowed_mask);
 }
 
-/*
- * unpark all watchdog threads that are specified in 'watchdog_cpumask'
- *
- * This function may only be called in a context that is protected against
- * races with CPU hotplug - for example, via get_online_cpus().
- */
-static void watchdog_unpark_threads(void)
+/* Temporarily park all watchdog threads */
+static void softlockup_park_all_threads(void)
 {
-       int cpu;
-
-       for_each_watchdog_cpu(cpu)
-               kthread_unpark(per_cpu(softlockup_watchdog, cpu));
+       cpumask_clear(&watchdog_allowed_mask);
+       softlockup_update_smpboot_threads();
 }
 
-static int update_watchdog_all_cpus(void)
+/* Unpark enabled threads */
+static void softlockup_unpark_threads(void)
 {
-       int ret;
-
-       ret = watchdog_park_threads();
-       if (ret)
-               return ret;
-
-       watchdog_unpark_threads();
-
-       return 0;
+       cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
+       softlockup_update_smpboot_threads();
 }
 
-static int watchdog_enable_all_cpus(void)
+static void lockup_detector_reconfigure(void)
 {
-       int err = 0;
-
-       if (!watchdog_running) {
-               err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
-                                                            &watchdog_cpumask);
-               if (err)
-                       pr_err("Failed to create watchdog threads, disabled\n");
-               else
-                       watchdog_running = 1;
-       } else {
-               /*
-                * Enable/disable the lockup detectors or
-                * change the sample period 'on the fly'.
-                */
-               err = update_watchdog_all_cpus();
+       cpus_read_lock();
+       watchdog_nmi_stop();
+       softlockup_park_all_threads();
+       set_sample_period();
+       lockup_detector_update_enable();
+       if (watchdog_enabled && watchdog_thresh)
+               softlockup_unpark_threads();
+       watchdog_nmi_start();
+       cpus_read_unlock();
+       /*
+        * Must be called outside the cpus locked section to prevent
+        * recursive locking in the perf code.
+        */
+       __lockup_detector_cleanup();
+}
 
-               if (err) {
-                       watchdog_disable_all_cpus();
-                       pr_err("Failed to update lockup detectors, disabled\n");
-               }
-       }
+/*
+ * Create the watchdog thread infrastructure and configure the detector(s).
+ *
+ * The threads are not unparked as watchdog_allowed_mask is empty.  When
+ * the threads are sucessfully initialized, take the proper locks and
+ * unpark the threads in the watchdog_cpumask if the watchdog is enabled.
+ */
+static __init void lockup_detector_setup(void)
+{
+       int ret;
 
-       if (err)
-               watchdog_enabled = 0;
+       /*
+        * If sysctl is off and watchdog got disabled on the command line,
+        * nothing to do here.
+        */
+       lockup_detector_update_enable();
 
-       return err;
-}
+       if (!IS_ENABLED(CONFIG_SYSCTL) &&
+           !(watchdog_enabled && watchdog_thresh))
+               return;
 
-static void watchdog_disable_all_cpus(void)
-{
-       if (watchdog_running) {
-               watchdog_running = 0;
-               smpboot_unregister_percpu_thread(&watchdog_threads);
+       ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
+                                                    &watchdog_allowed_mask);
+       if (ret) {
+               pr_err("Failed to initialize soft lockup detector threads\n");
+               return;
        }
+
+       mutex_lock(&watchdog_mutex);
+       softlockup_threads_initialized = true;
+       lockup_detector_reconfigure();
+       mutex_unlock(&watchdog_mutex);
 }
 
 #else /* CONFIG_SOFTLOCKUP_DETECTOR */
@@ -596,7 +608,18 @@ static inline int watchdog_park_threads(void) { return 0; }
 static inline void watchdog_unpark_threads(void) { }
 static inline int watchdog_enable_all_cpus(void) { return 0; }
 static inline void watchdog_disable_all_cpus(void) { }
-static inline void set_sample_period(void) { }
+static void lockup_detector_reconfigure(void)
+{
+       cpus_read_lock();
+       watchdog_nmi_stop();
+       lockup_detector_update_enable();
+       watchdog_nmi_start();
+       cpus_read_unlock();
+}
+static inline void lockup_detector_setup(void)
+{
+       lockup_detector_reconfigure();
+}
 #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
 
 static void __lockup_detector_cleanup(void)
@@ -630,101 +653,47 @@ void lockup_detector_soft_poweroff(void)
 
 #ifdef CONFIG_SYSCTL
 
-/*
- * Update the run state of the lockup detectors.
- */
-static int proc_watchdog_update(void)
+/* Propagate any changes to the watchdog threads */
+static void proc_watchdog_update(void)
 {
-       int err = 0;
-
-       /*
-        * Watchdog threads won't be started if they are already active.
-        * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
-        * care of this. If those threads are already active, the sample
-        * period will be updated and the lockup detectors will be enabled
-        * or disabled 'on the fly'.
-        */
-       if (watchdog_enabled && watchdog_thresh)
-               err = watchdog_enable_all_cpus();
-       else
-               watchdog_disable_all_cpus();
-
-       watchdog_nmi_reconfigure();
-
-       __lockup_detector_cleanup();
-
-       return err;
-
+       /* Remove impossible cpus to keep sysctl output clean. */
+       cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
+       lockup_detector_reconfigure();
 }
 
 /*
  * common function for watchdog, nmi_watchdog and soft_watchdog parameter
  *
- * caller             | table->data points to | 'which' contains the flag(s)
- * -------------------|-----------------------|-----------------------------
- * proc_watchdog      | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed
- *                    |                       | with SOFT_WATCHDOG_ENABLED
- * -------------------|-----------------------|-----------------------------
- * proc_nmi_watchdog  | nmi_watchdog_enabled  | NMI_WATCHDOG_ENABLED
- * -------------------|-----------------------|-----------------------------
- * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED
+ * caller             | table->data points to      | 'which'
+ * -------------------|----------------------------|--------------------------
+ * proc_watchdog      | watchdog_user_enabled      | NMI_WATCHDOG_ENABLED |
+ *                    |                            | SOFT_WATCHDOG_ENABLED
+ * -------------------|----------------------------|--------------------------
+ * proc_nmi_watchdog  | nmi_watchdog_user_enabled  | NMI_WATCHDOG_ENABLED
+ * -------------------|----------------------------|--------------------------
+ * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
  */
 static int proc_watchdog_common(int which, struct ctl_table *table, int write,
                                void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       int err, old, new;
-       int *watchdog_param = (int *)table->data;
+       int err, old, *param = table->data;
 
-       cpu_hotplug_disable();
        mutex_lock(&watchdog_mutex);
 
-       /*
-        * If the parameter is being read return the state of the corresponding
-        * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
-        * run state of the lockup detectors.
-        */
        if (!write) {
-               *watchdog_param = (watchdog_enabled & which) != 0;
+               /*
+                * On read synchronize the userspace interface. This is a
+                * racy snapshot.
+                */
+               *param = (watchdog_enabled & which) != 0;
                err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        } else {
+               old = READ_ONCE(*param);
                err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-               if (err)
-                       goto out;
-
-               /*
-                * There is a race window between fetching the current value
-                * from 'watchdog_enabled' and storing the new value. During
-                * this race window, watchdog_nmi_enable() can sneak in and
-                * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
-                * The 'cmpxchg' detects this race and the loop retries.
-                */
-               do {
-                       old = watchdog_enabled;
-                       /*
-                        * If the parameter value is not zero set the
-                        * corresponding bit(s), else clear it(them).
-                        */
-                       if (*watchdog_param)
-                               new = old | which;
-                       else
-                               new = old & ~which;
-               } while (cmpxchg(&watchdog_enabled, old, new) != old);
-
-               /*
-                * Update the run state of the lockup detectors. There is _no_
-                * need to check the value returned by proc_watchdog_update()
-                * and to restore the previous value of 'watchdog_enabled' as
-                * both lockup detectors are disabled if proc_watchdog_update()
-                * returns an error.
-                */
-               if (old == new)
-                       goto out;
-
-               err = proc_watchdog_update();
+               if (!err && old != READ_ONCE(*param))
+                       proc_watchdog_update();
        }
-out:
        mutex_unlock(&watchdog_mutex);
-       cpu_hotplug_enable();
        return err;
 }
 
@@ -744,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write,
 int proc_nmi_watchdog(struct ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+       if (!nmi_watchdog_available && write)
+               return -ENOTSUPP;
        return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
                                    table, write, buffer, lenp, ppos);
 }
@@ -764,45 +735,20 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
 int proc_watchdog_thresh(struct ctl_table *table, int write,
                         void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       int err, old, new;
+       int err, old;
 
-       cpu_hotplug_disable();
        mutex_lock(&watchdog_mutex);
 
-       old = ACCESS_ONCE(watchdog_thresh);
+       old = READ_ONCE(watchdog_thresh);
        err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
-       if (err || !write)
-               goto out;
+       if (!err && write && old != READ_ONCE(watchdog_thresh))
+               proc_watchdog_update();
 
-       /*
-        * Update the sample period. Restore on failure.
-        */
-       new = ACCESS_ONCE(watchdog_thresh);
-       if (old == new)
-               goto out;
-
-       set_sample_period();
-       err = proc_watchdog_update();
-       if (err) {
-               watchdog_thresh = old;
-               set_sample_period();
-       }
-out:
        mutex_unlock(&watchdog_mutex);
-       cpu_hotplug_enable();
        return err;
 }
 
-static int watchdog_update_cpus(void)
-{
-       if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR)) {
-               return smpboot_update_cpumask_percpu_thread(&watchdog_threads,
-                                                           &watchdog_cpumask);
-       }
-       return 0;
-}
-
 /*
  * The cpumask is the mask of possible cpus that the watchdog can run
  * on, not the mask of cpus it is actually running on.  This allows the
@@ -814,40 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
 {
        int err;
 
-       cpu_hotplug_disable();
        mutex_lock(&watchdog_mutex);
 
        err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
-       if (!err && write) {
-               /* Remove impossible cpus to keep sysctl output cleaner. */
-               cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
-                           cpu_possible_mask);
-
-               if (watchdog_running) {
-                       /*
-                        * Failure would be due to being unable to allocate
-                        * a temporary cpumask, so we are likely not in a
-                        * position to do much else to make things better.
-                        */
-                       if (watchdog_update_cpus() != 0)
-                               pr_err("cpumask update failed\n");
-               }
-
-               watchdog_nmi_reconfigure();
-               __lockup_detector_cleanup();
-       }
+       if (!err && write)
+               proc_watchdog_update();
 
        mutex_unlock(&watchdog_mutex);
-       cpu_hotplug_enable();
        return err;
 }
-
 #endif /* CONFIG_SYSCTL */
 
 void __init lockup_detector_init(void)
 {
-       set_sample_period();
-
 #ifdef CONFIG_NO_HZ_FULL
        if (tick_nohz_full_enabled()) {
                pr_info("Disabling watchdog on nohz_full cores by default\n");
@@ -858,6 +783,7 @@ void __init lockup_detector_init(void)
        cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
 #endif
 
-       if (watchdog_enabled)
-               watchdog_enable_all_cpus();
+       if (!watchdog_nmi_probe())
+               nmi_watchdog_available = true;
+       lockup_detector_setup();
 }