crypto: ccp: Add panic notifier for SEV/SNP firmware shutdown on kdump
authorAshish Kalra <ashish.kalra@amd.com>
Fri, 26 Jan 2024 04:11:20 +0000 (22:11 -0600)
committerBorislav Petkov (AMD) <bp@alien8.de>
Mon, 29 Jan 2024 19:34:19 +0000 (20:34 +0100)
Add a kdump safe version of sev_firmware_shutdown() and register it as a
crash_kexec_post_notifier so it will be invoked during panic/crash to do
SEV/SNP shutdown. This is required for transitioning all IOMMU pages to
reclaim/hypervisor state, otherwise re-init of IOMMU pages during
crashdump kernel boot fails and panics the crashdump kernel.

This panic notifier runs in atomic context, hence it ensures not to
acquire any locks/mutexes and polls for PSP command completion instead
of depending on PSP command completion interrupt.

  [ mdr: Remove use of "we" in comments. ]

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20240126041126.1927228-21-michael.roth@amd.com
arch/x86/include/asm/sev.h
arch/x86/kernel/crash.c
arch/x86/kernel/sev.c
arch/x86/virt/svm/sev.c
drivers/crypto/ccp/sev-dev.c

index 60de1b43a7290a9fabbbb0d6ff243a3e64bf9967..bed95e1f4d5262fd173c30abe1cd3a448c792f74 100644 (file)
@@ -227,6 +227,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn
 void snp_accept_memory(phys_addr_t start, phys_addr_t end);
 u64 snp_get_unsupported_features(u64 status);
 u64 sev_get_status(void);
+void kdump_sev_callback(void);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
@@ -255,6 +256,7 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
 static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
 static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
 static inline u64 sev_get_status(void) { return 0; }
+static inline void kdump_sev_callback(void) { }
 #endif
 
 #ifdef CONFIG_KVM_AMD_SEV
index b6b044356f1b40599de1d41a1dfc5405ae2ca00c..d184c29398db7d9fdd9a305ee909efcdd6b77c02 100644 (file)
@@ -40,6 +40,7 @@
 #include <asm/intel_pt.h>
 #include <asm/crash.h>
 #include <asm/cmdline.h>
+#include <asm/sev.h>
 
 /* Used while preparing memory map entries for second kernel */
 struct crash_memmap_data {
@@ -59,6 +60,8 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
         */
        cpu_emergency_stop_pt();
 
+       kdump_sev_callback();
+
        disable_local_APIC();
 }
 
index 1ec753331524abb6847ac90f8e9bea912f42c7f0..002af6c30601b379aae02c759366bce04ec316be 100644 (file)
@@ -2265,3 +2265,13 @@ static int __init snp_init_platform_device(void)
        return 0;
 }
 device_initcall(snp_init_platform_device);
+
+void kdump_sev_callback(void)
+{
+       /*
+        * Do wbinvd() on remote CPUs when SNP is enabled in order to
+        * safely do SNP_SHUTDOWN on the local CPU.
+        */
+       if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+               wbinvd();
+}
index 0dffbf3908d03fa25e4e1f74bcc1cc2ef4ddfc6e..cffe1157a90acfcf741b31ac216d6fd3a9ed4fd2 100644 (file)
@@ -216,6 +216,12 @@ skip_enable:
 
        cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL);
 
+       /*
+        * Setting crash_kexec_post_notifiers to 'true' to ensure that SNP panic
+        * notifier is invoked to do SNP IOMMU shutdown before kdump.
+        */
+       crash_kexec_post_notifiers = true;
+
        return 0;
 
 nosnp:
index 605c6bf88cf7aeabb3621a39d4736f19fd6bb844..504a2216bded6799000485fdabd54a2862d1fd28 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/hw_random.h>
 #include <linux/ccp.h>
 #include <linux/firmware.h>
+#include <linux/panic_notifier.h>
 #include <linux/gfp.h>
 #include <linux/cpufeature.h>
 #include <linux/fs.h>
@@ -143,6 +144,25 @@ static int sev_wait_cmd_ioc(struct sev_device *sev,
 {
        int ret;
 
+       /*
+        * If invoked during panic handling, local interrupts are disabled,
+        * so the PSP command completion interrupt can't be used. Poll for
+        * PSP command completion instead.
+        */
+       if (irqs_disabled()) {
+               unsigned long timeout_usecs = (timeout * USEC_PER_SEC) / 10;
+
+               /* Poll for SEV command completion: */
+               while (timeout_usecs--) {
+                       *reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg);
+                       if (*reg & PSP_CMDRESP_RESP)
+                               return 0;
+
+                       udelay(10);
+               }
+               return -ETIMEDOUT;
+       }
+
        ret = wait_event_timeout(sev->int_queue,
                        sev->int_rcvd, timeout * HZ);
        if (!ret)
@@ -1338,17 +1358,6 @@ static int __sev_platform_shutdown_locked(int *error)
        return ret;
 }
 
-static int sev_platform_shutdown(int *error)
-{
-       int rc;
-
-       mutex_lock(&sev_cmd_mutex);
-       rc = __sev_platform_shutdown_locked(NULL);
-       mutex_unlock(&sev_cmd_mutex);
-
-       return rc;
-}
-
 static int sev_get_platform_state(int *state, int *error)
 {
        struct sev_user_data_status data;
@@ -1624,7 +1633,7 @@ fw_err:
        return ret;
 }
 
-static int __sev_snp_shutdown_locked(int *error)
+static int __sev_snp_shutdown_locked(int *error, bool panic)
 {
        struct sev_device *sev = psp_master->sev_data;
        struct sev_data_snp_shutdown_ex data;
@@ -1637,7 +1646,16 @@ static int __sev_snp_shutdown_locked(int *error)
        data.len = sizeof(data);
        data.iommu_snp_shutdown = 1;
 
-       wbinvd_on_all_cpus();
+       /*
+        * If invoked during panic handling, local interrupts are disabled
+        * and all CPUs are stopped, so wbinvd_on_all_cpus() can't be called.
+        * In that case, a wbinvd() is done on remote CPUs via the NMI
+        * callback, so only a local wbinvd() is needed here.
+        */
+       if (!panic)
+               wbinvd_on_all_cpus();
+       else
+               wbinvd();
 
        ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error);
        /* SHUTDOWN may require DF_FLUSH */
@@ -1681,17 +1699,6 @@ static int __sev_snp_shutdown_locked(int *error)
        return ret;
 }
 
-static int sev_snp_shutdown(int *error)
-{
-       int rc;
-
-       mutex_lock(&sev_cmd_mutex);
-       rc = __sev_snp_shutdown_locked(error);
-       mutex_unlock(&sev_cmd_mutex);
-
-       return rc;
-}
-
 static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
 {
        struct sev_device *sev = psp_master->sev_data;
@@ -2139,19 +2146,28 @@ e_err:
        return ret;
 }
 
-static void sev_firmware_shutdown(struct sev_device *sev)
+static void __sev_firmware_shutdown(struct sev_device *sev, bool panic)
 {
        int error;
 
-       sev_platform_shutdown(NULL);
+       __sev_platform_shutdown_locked(NULL);
 
        if (sev_es_tmr) {
-               /* The TMR area was encrypted, flush it from the cache */
-               wbinvd_on_all_cpus();
+               /*
+                * The TMR area was encrypted, flush it from the cache.
+                *
+                * If invoked during panic handling, local interrupts are
+                * disabled and all CPUs are stopped, so wbinvd_on_all_cpus()
+                * can't be used. In that case, wbinvd() is done on remote CPUs
+                * via the NMI callback, and done for this CPU later during
+                * SNP shutdown, so wbinvd_on_all_cpus() can be skipped.
+                */
+               if (!panic)
+                       wbinvd_on_all_cpus();
 
                __snp_free_firmware_pages(virt_to_page(sev_es_tmr),
                                          get_order(sev_es_tmr_size),
-                                         false);
+                                         true);
                sev_es_tmr = NULL;
        }
 
@@ -2167,7 +2183,14 @@ static void sev_firmware_shutdown(struct sev_device *sev)
                snp_range_list = NULL;
        }
 
-       sev_snp_shutdown(&error);
+       __sev_snp_shutdown_locked(&error, panic);
+}
+
+static void sev_firmware_shutdown(struct sev_device *sev)
+{
+       mutex_lock(&sev_cmd_mutex);
+       __sev_firmware_shutdown(sev, false);
+       mutex_unlock(&sev_cmd_mutex);
 }
 
 void sev_dev_destroy(struct psp_device *psp)
@@ -2185,6 +2208,29 @@ void sev_dev_destroy(struct psp_device *psp)
        psp_clear_sev_irq_handler(psp);
 }
 
+static int snp_shutdown_on_panic(struct notifier_block *nb,
+                                unsigned long reason, void *arg)
+{
+       struct sev_device *sev = psp_master->sev_data;
+
+       /*
+        * If sev_cmd_mutex is already acquired, then it's likely
+        * another PSP command is in flight and issuing a shutdown
+        * would fail in unexpected ways. Rather than create even
+        * more confusion during a panic, just bail out here.
+        */
+       if (mutex_is_locked(&sev_cmd_mutex))
+               return NOTIFY_DONE;
+
+       __sev_firmware_shutdown(sev, true);
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block snp_panic_notifier = {
+       .notifier_call = snp_shutdown_on_panic,
+};
+
 int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
                                void *data, int *error)
 {
@@ -2222,6 +2268,8 @@ void sev_pci_init(void)
        dev_info(sev->dev, "SEV%s API:%d.%d build:%d\n", sev->snp_initialized ?
                "-SNP" : "", sev->api_major, sev->api_minor, sev->build);
 
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &snp_panic_notifier);
        return;
 
 err:
@@ -2236,4 +2284,7 @@ void sev_pci_exit(void)
                return;
 
        sev_firmware_shutdown(sev);
+
+       atomic_notifier_chain_unregister(&panic_notifier_list,
+                                        &snp_panic_notifier);
 }