Merge tag 'i2c-for-6.4-rc1-part2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-block.git] / kernel / smp.c
index 038d666f327bd309950a4f912414541750358c51..ab3e5dad6cfe9289fe347ab048ed69d3a3101b51 100644 (file)
 #include <linux/sched/debug.h>
 #include <linux/jump_label.h>
 
+#include <trace/events/ipi.h>
+
 #include "smpboot.h"
 #include "sched/smp.h"
 
 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK)
 
-struct cfd_percpu {
-       call_single_data_t      csd;
-};
-
 struct call_function_data {
-       struct cfd_percpu       __percpu *pcpu;
+       call_single_data_t      __percpu *csd;
        cpumask_var_t           cpumask;
        cpumask_var_t           cpumask_ipi;
 };
@@ -59,8 +57,8 @@ int smpcfd_prepare_cpu(unsigned int cpu)
                free_cpumask_var(cfd->cpumask);
                return -ENOMEM;
        }
-       cfd->pcpu = alloc_percpu(struct cfd_percpu);
-       if (!cfd->pcpu) {
+       cfd->csd = alloc_percpu(call_single_data_t);
+       if (!cfd->csd) {
                free_cpumask_var(cfd->cpumask);
                free_cpumask_var(cfd->cpumask_ipi);
                return -ENOMEM;
@@ -75,7 +73,7 @@ int smpcfd_dead_cpu(unsigned int cpu)
 
        free_cpumask_var(cfd->cpumask);
        free_cpumask_var(cfd->cpumask_ipi);
-       free_percpu(cfd->pcpu);
+       free_percpu(cfd->csd);
        return 0;
 }
 
@@ -105,6 +103,24 @@ void __init call_function_init(void)
        smpcfd_prepare_cpu(smp_processor_id());
 }
 
+static __always_inline void
+send_call_function_single_ipi(int cpu)
+{
+       if (call_function_single_prep_ipi(cpu)) {
+               trace_ipi_send_cpu(cpu, _RET_IP_,
+                                  generic_smp_call_function_single_interrupt);
+               arch_send_call_function_single_ipi(cpu);
+       }
+}
+
+static __always_inline void
+send_call_function_ipi_mask(struct cpumask *mask)
+{
+       trace_ipi_send_cpumask(mask, _RET_IP_,
+                              generic_smp_call_function_single_interrupt);
+       arch_send_call_function_ipi_mask(mask);
+}
+
 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
 
 static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled);
@@ -120,11 +136,16 @@ static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug
  */
 static int __init csdlock_debug(char *str)
 {
+       int ret;
        unsigned int val = 0;
 
-       get_option(&str, &val);
-       if (val)
-               static_branch_enable(&csdlock_debug_enabled);
+       ret = get_option(&str, &val);
+       if (ret) {
+               if (val)
+                       static_branch_enable(&csdlock_debug_enabled);
+               else
+                       static_branch_disable(&csdlock_debug_enabled);
+       }
 
        return 1;
 }
@@ -302,9 +323,28 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
 void __smp_call_single_queue(int cpu, struct llist_node *node)
 {
        /*
-        * The list addition should be visible before sending the IPI
-        * handler locks the list to pull the entry off it because of
-        * normal cache coherency rules implied by spinlocks.
+        * We have to check the type of the CSD before queueing it, because
+        * once queued it can have its flags cleared by
+        *   flush_smp_call_function_queue()
+        * even if we haven't sent the smp_call IPI yet (e.g. the stopper
+        * executes migration_cpu_stop() on the remote CPU).
+        */
+       if (trace_ipi_send_cpu_enabled()) {
+               call_single_data_t *csd;
+               smp_call_func_t func;
+
+               csd = container_of(node, call_single_data_t, node.llist);
+               func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
+                       sched_ttwu_pending : csd->func;
+
+               trace_ipi_send_cpu(cpu, _RET_IP_, func);
+       }
+
+       /*
+        * The list addition should be visible to the target CPU when it pops
+        * the head of the list to pull the entry off it in the IPI handler
+        * because of normal cache coherency rules implied by the underlying
+        * llist ops.
         *
         * If IPIs can go out of order to the cache coherency protocol
         * in an architecture, sufficient synchronisation should be added
@@ -688,9 +728,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
        int cpu, last_cpu, this_cpu = smp_processor_id();
        struct call_function_data *cfd;
        bool wait = scf_flags & SCF_WAIT;
+       int nr_cpus = 0, nr_queued = 0;
        bool run_remote = false;
        bool run_local = false;
-       int nr_cpus = 0;
 
        lockdep_assert_preemption_disabled();
 
@@ -730,10 +770,12 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 
                cpumask_clear(cfd->cpumask_ipi);
                for_each_cpu(cpu, cfd->cpumask) {
-                       call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+                       call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
 
-                       if (cond_func && !cond_func(cpu, info))
+                       if (cond_func && !cond_func(cpu, info)) {
+                               __cpumask_clear_cpu(cpu, cfd->cpumask);
                                continue;
+                       }
 
                        csd_lock(csd);
                        if (wait)
@@ -749,8 +791,16 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
                                nr_cpus++;
                                last_cpu = cpu;
                        }
+                       nr_queued++;
                }
 
+               /*
+                * Trace each smp_function_call_*() as an IPI, actual IPIs
+                * will be traced with func==generic_smp_call_function_single_ipi().
+                */
+               if (nr_queued)
+                       trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func);
+
                /*
                 * Choose the most efficient way to send an IPI. Note that the
                 * number of CPUs might be zero due to concurrent changes to the
@@ -759,7 +809,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
                if (nr_cpus == 1)
                        send_call_function_single_ipi(last_cpu);
                else if (likely(nr_cpus > 1))
-                       arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
+                       send_call_function_ipi_mask(cfd->cpumask_ipi);
        }
 
        if (run_local && (!cond_func || cond_func(this_cpu, info))) {
@@ -774,7 +824,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
                for_each_cpu(cpu, cfd->cpumask) {
                        call_single_data_t *csd;
 
-                       csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd;
+                       csd = per_cpu_ptr(cfd->csd, cpu);
                        csd_lock_wait(csd);
                }
        }