Revert "sched/numa: add statistics of numa balance task"
authorChen Yu <yu.c.chen@intel.com>
Fri, 4 Jul 2025 13:56:20 +0000 (21:56 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 10 Jul 2025 04:07:56 +0000 (21:07 -0700)
This reverts commit ad6b26b6a0a79166b53209df2ca1cf8636296382.

This commit introduces per-memcg/task NUMA balance statistics, but
unfortunately it introduced a NULL pointer exception due to the following
race condition: After a swap task candidate was chosen, its mm_struct
pointer was set to NULL due to task exit.  Later, when performing the
actual task swapping, the p->mm caused the problem.

CPU0                                   CPU1
:
...
task_numa_migrate
     task_numa_find_cpu
      task_numa_compare
        # a normal task p is chosen
        env->best_task = p

                                          # p exit:
                                          exit_signals(p);
                                             p->flags |= PF_EXITING
                                          exit_mm
                                             p->mm = NULL;

      migrate_swap_stop
        __migrate_swap_task((arg->src_task, arg->dst_cpu)
         count_memcg_event_mm(p->mm, NUMA_TASK_SWAP)# p->mm is NULL

task_lock() should be held and the PF_EXITING flag needs to be checked to
prevent this from happening.  After discussion, the conclusion was that
adding a lock is not worthwhile for some statistics calculations.  Revert
the change and rely on the tracepoint for this purpose.

Link: https://lkml.kernel.org/r/20250704135620.685752-1-yu.c.chen@intel.com
Link: https://lkml.kernel.org/r/20250708064917.BBD13C4CEED@smtp.kernel.org
Fixes: ad6b26b6a0a7 ("sched/numa: add statistics of numa balance task")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Reported-by: Jirka Hladky <jhladky@redhat.com>
Closes: https://lore.kernel.org/all/CAE4VaGBLJxpd=NeRJXpSCuw=REhC5LWJpC29kDy-Zh2ZDyzQZA@mail.gmail.com/
Reported-by: Srikanth Aithal <Srikanth.Aithal@amd.com>
Reported-by: Suneeth D <Suneeth.D@amd.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Hladky <jhladky@redhat.com>
Cc: Libo Chen <libo.chen@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Documentation/admin-guide/cgroup-v2.rst
include/linux/sched.h
include/linux/vm_event_item.h
kernel/sched/core.c
kernel/sched/debug.c
mm/memcontrol.c
mm/vmstat.c

index 0cc35a14afbe0b5dc4a45a6ae3e91b49daf41ef9..bd98ea3175ec1234a3faca2be616fd79d2c4726b 100644 (file)
@@ -1732,12 +1732,6 @@ The following nested keys are defined.
          numa_hint_faults (npn)
                Number of NUMA hinting faults.
 
-         numa_task_migrated (npn)
-               Number of task migration by NUMA balancing.
-
-         numa_task_swapped (npn)
-               Number of task swap by NUMA balancing.
-
          pgdemote_kswapd
                Number of pages demoted by kswapd.
 
index 4f78a64beb52c425e46c66427b2f608ba30b3d99..aa9c5be7a632592a8e63516b924806d53a15ea42 100644 (file)
@@ -548,10 +548,6 @@ struct sched_statistics {
        u64                             nr_failed_migrations_running;
        u64                             nr_failed_migrations_hot;
        u64                             nr_forced_migrations;
-#ifdef CONFIG_NUMA_BALANCING
-       u64                             numa_task_migrated;
-       u64                             numa_task_swapped;
-#endif
 
        u64                             nr_wakeups;
        u64                             nr_wakeups_sync;
index 91a3ce9a2687e2c3cf9384948bce65c0c4b9c296..9e15a088ba38e2d3864ea7bddf486e87a1ff4410 100644 (file)
@@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                NUMA_HINT_FAULTS,
                NUMA_HINT_FAULTS_LOCAL,
                NUMA_PAGE_MIGRATE,
-               NUMA_TASK_MIGRATE,
-               NUMA_TASK_SWAP,
 #endif
 #ifdef CONFIG_MIGRATION
                PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
index ec68fc686bd74292e0edba3b0229f5db590ab038..81c6df746df17b88b1d5bc5a61c5c9d98e07084f 100644 (file)
@@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 #ifdef CONFIG_NUMA_BALANCING
 static void __migrate_swap_task(struct task_struct *p, int cpu)
 {
-       __schedstat_inc(p->stats.numa_task_swapped);
-       count_vm_numa_event(NUMA_TASK_SWAP);
-       count_memcg_event_mm(p->mm, NUMA_TASK_SWAP);
-
        if (task_on_rq_queued(p)) {
                struct rq *src_rq, *dst_rq;
                struct rq_flags srf, drf;
@@ -7939,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
        if (!cpumask_test_cpu(target_cpu, p->cpus_ptr))
                return -EINVAL;
 
-       __schedstat_inc(p->stats.numa_task_migrated);
-       count_vm_numa_event(NUMA_TASK_MIGRATE);
-       count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE);
+       /* TODO: This is not properly updating schedstats */
+
        trace_sched_move_numa(p, curr_cpu, target_cpu);
        return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg);
 }
index 9d71baf08075154c8930070690ae1c4a0d051d2e..557246880a7e0839277df662703b7bfabeb3a497 100644 (file)
@@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
                P_SCHEDSTAT(nr_failed_migrations_running);
                P_SCHEDSTAT(nr_failed_migrations_hot);
                P_SCHEDSTAT(nr_forced_migrations);
-#ifdef CONFIG_NUMA_BALANCING
-               P_SCHEDSTAT(numa_task_migrated);
-               P_SCHEDSTAT(numa_task_swapped);
-#endif
                P_SCHEDSTAT(nr_wakeups);
                P_SCHEDSTAT(nr_wakeups_sync);
                P_SCHEDSTAT(nr_wakeups_migrate);
index 902da8a9c643a070ca323804e557ae5173c688c5..70fdeda1120b3ef3ac40504dfcc04271342ec12f 100644 (file)
@@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = {
        NUMA_PAGE_MIGRATE,
        NUMA_PTE_UPDATES,
        NUMA_HINT_FAULTS,
-       NUMA_TASK_MIGRATE,
-       NUMA_TASK_SWAP,
 #endif
 };
 
index 429ae5339bfef2ab86ec480d4cec48d924a0243f..a78d70ddeacd686ce4cc2caf2ed76556abbda283 100644 (file)
@@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = {
        "numa_hint_faults",
        "numa_hint_faults_local",
        "numa_pages_migrated",
-       "numa_task_migrated",
-       "numa_task_swapped",
 #endif
 #ifdef CONFIG_MIGRATION
        "pgmigrate_success",