iommu/amd: Fix Invalid wait context issue
authorVasant Hegde <vasant.hegde@amd.com>
Thu, 30 May 2024 08:48:01 +0000 (08:48 +0000)
committerJoerg Roedel <jroedel@suse.de>
Tue, 4 Jun 2024 12:00:59 +0000 (14:00 +0200)
With commit c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF")
we are hitting below issue. This happens because in IOPF enablement path
it holds spin lock with irq disable and then tries to take mutex lock.

dmesg:
-----
[    0.938739] =============================
[    0.938740] [ BUG: Invalid wait context ]
[    0.938742] 6.10.0-rc1+ #1 Not tainted
[    0.938745] -----------------------------
[    0.938746] swapper/0/1 is trying to lock:
[    0.938748] ffffffff8c9f01d8 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x78/0x4a0
[    0.938767] other info that might help us debug this:
[    0.938768] context-{5:5}
[    0.938769] 7 locks held by swapper/0/1:
[    0.938772]  #0: ffff888101a91310 (&group->mutex){+.+.}-{4:4}, at: bus_iommu_probe+0x70/0x160
[    0.938790]  #1: ffff888101d1f1b8 (&domain->lock){....}-{3:3}, at: amd_iommu_attach_device+0xa5/0x700
[    0.938799]  #2: ffff888101cc3d18 (&dev_data->lock){....}-{3:3}, at: amd_iommu_attach_device+0xc5/0x700
[    0.938806]  #3: ffff888100052830 (&iommu->lock){....}-{2:2}, at: amd_iommu_iopf_add_device+0x3f/0xa0
[    0.938813]  #4: ffffffff8945a340 (console_lock){+.+.}-{0:0}, at: _printk+0x48/0x50
[    0.938822]  #5: ffffffff8945a390 (console_srcu){....}-{0:0}, at: console_flush_all+0x58/0x4e0
[    0.938867]  #6: ffffffff82459f80 (console_owner){....}-{0:0}, at: console_flush_all+0x1f0/0x4e0
[    0.938872] stack backtrace:
[    0.938874] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.10.0-rc1+ #1
[    0.938877] Hardware name: HP HP EliteBook 745 G3/807E, BIOS N73 Ver. 01.39 04/16/2019

Fix above issue by re-arranging code in attach device path:
  - move device PASID/IOPF enablement outside lock in AMD IOMMU driver.
    This is safe as core layer holds group->mutex lock before calling
    iommu_ops->attach_dev.

Reported-by: Borislav Petkov <bp@alien8.de>
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Reported-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Fixes: c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF")
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/20240530084801.10758-1-vasant.hegde@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
drivers/iommu/amd/iommu.c
drivers/iommu/amd/ppr.c

index 52d83730a22ad4aa3a5feb48cd1be21f503e6cac..c2703599bb16684aa7f47f90f0dbc695498e01a9 100644 (file)
@@ -2032,7 +2032,6 @@ static int do_attach(struct iommu_dev_data *dev_data,
                     struct protection_domain *domain)
 {
        struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
-       struct pci_dev *pdev;
        int ret = 0;
 
        /* Update data structures */
@@ -2047,30 +2046,13 @@ static int do_attach(struct iommu_dev_data *dev_data,
        domain->dev_iommu[iommu->index] += 1;
        domain->dev_cnt                 += 1;
 
-       pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
+       /* Setup GCR3 table */
        if (pdom_is_sva_capable(domain)) {
                ret = init_gcr3_table(dev_data, domain);
                if (ret)
                        return ret;
-
-               if (pdev) {
-                       pdev_enable_caps(pdev);
-
-                       /*
-                        * Device can continue to function even if IOPF
-                        * enablement failed. Hence in error path just
-                        * disable device PRI support.
-                        */
-                       if (amd_iommu_iopf_add_device(iommu, dev_data))
-                               pdev_disable_cap_pri(pdev);
-               }
-       } else if (pdev) {
-               pdev_enable_cap_ats(pdev);
        }
 
-       /* Update device table */
-       amd_iommu_dev_update_dte(dev_data, true);
-
        return ret;
 }
 
@@ -2163,6 +2145,11 @@ static void detach_device(struct device *dev)
 
        do_detach(dev_data);
 
+out:
+       spin_unlock(&dev_data->lock);
+
+       spin_unlock_irqrestore(&domain->lock, flags);
+
        /* Remove IOPF handler */
        if (ppr)
                amd_iommu_iopf_remove_device(iommu, dev_data);
@@ -2170,10 +2157,6 @@ static void detach_device(struct device *dev)
        if (dev_is_pci(dev))
                pdev_disable_caps(to_pci_dev(dev));
 
-out:
-       spin_unlock(&dev_data->lock);
-
-       spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 static struct iommu_device *amd_iommu_probe_device(struct device *dev)
@@ -2485,6 +2468,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
        struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
        struct protection_domain *domain = to_pdomain(dom);
        struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
+       struct pci_dev *pdev;
        int ret;
 
        /*
@@ -2517,7 +2501,23 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
        }
 #endif
 
-       iommu_completion_wait(iommu);
+       pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
+       if (pdev && pdom_is_sva_capable(domain)) {
+               pdev_enable_caps(pdev);
+
+               /*
+                * Device can continue to function even if IOPF
+                * enablement failed. Hence in error path just
+                * disable device PRI support.
+                */
+               if (amd_iommu_iopf_add_device(iommu, dev_data))
+                       pdev_disable_cap_pri(pdev);
+       } else if (pdev) {
+               pdev_enable_cap_ats(pdev);
+       }
+
+       /* Update device table */
+       amd_iommu_dev_update_dte(dev_data, true);
 
        return ret;
 }
index 31d98a2a11a6e954140f71c4085fcd3df0cb536c..7c67d69f0b8cad4aeecec6343dbc46b680b55c46 100644 (file)
@@ -248,40 +248,26 @@ void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt,
 int amd_iommu_iopf_add_device(struct amd_iommu *iommu,
                              struct iommu_dev_data *dev_data)
 {
-       unsigned long flags;
        int ret = 0;
 
        if (!dev_data->pri_enabled)
                return ret;
 
-       raw_spin_lock_irqsave(&iommu->lock, flags);
-
-       if (!iommu->iopf_queue) {
-               ret = -EINVAL;
-               goto out_unlock;
-       }
+       if (!iommu->iopf_queue)
+               return -EINVAL;
 
        ret = iopf_queue_add_device(iommu->iopf_queue, dev_data->dev);
        if (ret)
-               goto out_unlock;
+               return ret;
 
        dev_data->ppr = true;
-
-out_unlock:
-       raw_spin_unlock_irqrestore(&iommu->lock, flags);
-       return ret;
+       return 0;
 }
 
 /* Its assumed that caller has verified that device was added to iopf queue */
 void amd_iommu_iopf_remove_device(struct amd_iommu *iommu,
                                  struct iommu_dev_data *dev_data)
 {
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&iommu->lock, flags);
-
        iopf_queue_remove_device(iommu->iopf_queue, dev_data->dev);
        dev_data->ppr = false;
-
-       raw_spin_unlock_irqrestore(&iommu->lock, flags);
 }