PM: Restrict swap use to later in the suspend sequence
authorMario Limonciello <mario.limonciello@amd.com>
Fri, 13 Jun 2025 21:43:44 +0000 (16:43 -0500)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Thu, 26 Jun 2025 18:39:34 +0000 (20:39 +0200)
Currently swap is restricted before drivers have had a chance to do
their prepare() PM callbacks. Restricting swap this early means that if
a driver needs to evict some content from memory into sawp in it's
prepare callback, it won't be able to.

On AMD dGPUs this can lead to failed suspends under memory pressure
situations as all VRAM must be evicted to system memory or swap.

Move the swap restriction to right after all devices have had a chance
to do the prepare() callback.  If there is any problem with the sequence,
restore swap in the appropriate dpm resume callbacks or error handling
paths.

Closes: https://github.com/ROCm/ROCK-Kernel-Driver/issues/174
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2362
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Tested-by: Nat Wittstock <nat@fardog.io>
Tested-by: Lucian Langa <lucilanga@7pot.org>
Link: https://patch.msgid.link/20250613214413.4127087-1-superm1@kernel.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/base/power/main.c
include/linux/suspend.h
kernel/kexec_core.c
kernel/power/hibernate.c
kernel/power/power.h
kernel/power/suspend.c

index eebe699fdf4f6c744d481d570ed9bac66a36da71..bf77d28e959fd3d6d74ba1aa55d86bce12877dc5 100644 (file)
@@ -1236,6 +1236,7 @@ void dpm_complete(pm_message_t state)
  */
 void dpm_resume_end(pm_message_t state)
 {
+       pm_restore_gfp_mask();
        dpm_resume(state);
        dpm_complete(state);
 }
@@ -2176,8 +2177,10 @@ int dpm_suspend_start(pm_message_t state)
        error = dpm_prepare(state);
        if (error)
                dpm_save_failed_step(SUSPEND_PREPARE);
-       else
+       else {
+               pm_restrict_gfp_mask();
                error = dpm_suspend(state);
+       }
 
        dpm_show_time(starttime, state, error, "start");
        return error;
index b1c76c8f2c8220a65f2c94dfc9f1561c7df8ca41..6a3f92098872038dc12417543566c935a1a9953a 100644 (file)
@@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 extern void ksys_sync_helper(void);
 extern void pm_report_hw_sleep_time(u64 t);
 extern void pm_report_max_hw_sleep(u64 t);
+void pm_restrict_gfp_mask(void);
+void pm_restore_gfp_mask(void);
 
 #define pm_notifier(fn, pri) {                         \
        static struct notifier_block fn##_nb =                  \
@@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 static inline void pm_report_hw_sleep_time(u64 t) {};
 static inline void pm_report_max_hw_sleep(u64 t) {};
 
+static inline void pm_restrict_gfp_mask(void) {}
+static inline void pm_restore_gfp_mask(void) {}
+
 static inline void ksys_sync_helper(void) {}
 
 #define pm_notifier(fn, pri)   do { (void)(fn); } while (0)
index 9c59fa480b0b6fa0d1a0ee454671e8da337d7a62..3a9a9f240dbc9653db29c7cc2a6209e8bb70b394 100644 (file)
@@ -1136,6 +1136,7 @@ int kernel_kexec(void)
  Resume_devices:
                dpm_resume_end(PMSG_RESTORE);
  Resume_console:
+               pm_restore_gfp_mask();
                console_resume_all();
                thaw_processes();
  Restore_console:
index 519fb09de5e0cf3a99640e6d73833ed876a235b4..9216e3b91d3b3bfdabdbf161da10ee7d6ec74c46 100644 (file)
@@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode)
        }
 
        console_suspend_all();
-       pm_restrict_gfp_mask();
 
        error = dpm_suspend(PMSG_FREEZE);
 
@@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode)
 
        pm_prepare_console();
        console_suspend_all();
-       pm_restrict_gfp_mask();
        error = dpm_suspend_start(PMSG_QUIESCE);
        if (!error) {
                error = resume_target_kernel(platform_mode);
@@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode)
                BUG_ON(!error);
        }
        dpm_resume_end(PMSG_RECOVER);
-       pm_restore_gfp_mask();
        console_resume_all();
        pm_restore_console();
        return error;
index cb1d7156200204bd2ab25581ee44747fccd3727d..7ccd709af93f5fe123bc8c18e022c2900e991ae1 100644 (file)
@@ -239,11 +239,6 @@ static inline void suspend_test_finish(const char *label) {}
 /* kernel/power/main.c */
 extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down);
 extern int pm_notifier_call_chain(unsigned long val);
-void pm_restrict_gfp_mask(void);
-void pm_restore_gfp_mask(void);
-#else
-static inline void pm_restrict_gfp_mask(void) {}
-static inline void pm_restore_gfp_mask(void) {}
 #endif
 
 #ifdef CONFIG_HIGHMEM
index 76b141b9aac0196fe7527454fef0ec6ff43892b8..bb608b68fb3013815b30938110cbf19f39a22504 100644 (file)
@@ -540,6 +540,7 @@ int suspend_devices_and_enter(suspend_state_t state)
        return error;
 
  Recover_platform:
+       pm_restore_gfp_mask();
        platform_recover(state);
        goto Resume_devices;
 }
@@ -606,9 +607,7 @@ static int enter_state(suspend_state_t state)
 
        trace_suspend_resume(TPS("suspend_enter"), state, false);
        pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]);
-       pm_restrict_gfp_mask();
        error = suspend_devices_and_enter(state);
-       pm_restore_gfp_mask();
 
  Finish:
        events_check_enabled = false;