iommu/vt-d: Introduce batched cache invalidation
authorTina Zhang <tina.zhang@intel.com>
Mon, 2 Sep 2024 02:27:24 +0000 (10:27 +0800)
committerJoerg Roedel <jroedel@suse.de>
Mon, 2 Sep 2024 16:15:03 +0000 (18:15 +0200)
Converts IOTLB and Dev-IOTLB invalidation to a batched model. Cache tag
invalidation requests for a domain are now accumulated in a qi_batch
structure before being flushed in bulk. It replaces the previous per-
request qi_flush approach with a more efficient batching mechanism.

Co-developed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Link: https://lore.kernel.org/r/20240815065221.50328-5-tina.zhang@intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
drivers/iommu/intel/cache.c

index 2e997d782bebe36461fc9cc033ccc3d3043d7288..e5b89f728ad3b28685234dd2c1e2de4c46404ce9 100644 (file)
@@ -262,6 +262,79 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
        return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
 }
 
+static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
+{
+       if (!iommu || !batch->index)
+               return;
+
+       qi_submit_sync(iommu, batch->descs, batch->index, 0);
+
+       /* Reset the index value and clean the whole batch buffer. */
+       memset(batch, 0, sizeof(*batch));
+}
+
+static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
+{
+       if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
+               qi_batch_flush_descs(iommu, batch);
+}
+
+static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+                              unsigned int size_order, u64 type,
+                              struct qi_batch *batch)
+{
+       qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
+       qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+                                  u16 qdep, u64 addr, unsigned int mask,
+                                  struct qi_batch *batch)
+{
+       /*
+        * According to VT-d spec, software is recommended to not submit any Device-TLB
+        * invalidation requests while address remapping hardware is disabled.
+        */
+       if (!(iommu->gcmd & DMA_GCMD_TE))
+               return;
+
+       qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
+       qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
+                               u64 addr, unsigned long npages, bool ih,
+                               struct qi_batch *batch)
+{
+       /*
+        * npages == -1 means a PASID-selective invalidation, otherwise,
+        * a positive value for Page-selective-within-PASID invalidation.
+        * 0 is not a valid input.
+        */
+       if (!npages)
+               return;
+
+       qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
+       qi_batch_increment_index(iommu, batch);
+}
+
+static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+                                        u32 pasid,  u16 qdep, u64 addr,
+                                        unsigned int size_order, struct qi_batch *batch)
+{
+       /*
+        * According to VT-d spec, software is recommended to not submit any
+        * Device-TLB invalidation requests while address remapping hardware
+        * is disabled.
+        */
+       if (!(iommu->gcmd & DMA_GCMD_TE))
+               return;
+
+       qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
+                               &batch->descs[batch->index]);
+       qi_batch_increment_index(iommu, batch);
+}
+
 static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
                                  unsigned long addr, unsigned long pages,
                                  unsigned long mask, int ih)
@@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
        u64 type = DMA_TLB_PSI_FLUSH;
 
        if (domain->use_first_level) {
-               qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih);
+               qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
+                                   pages, ih, domain->qi_batch);
                return;
        }
 
@@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
        }
 
        if (ecap_qis(iommu->ecap))
-               qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
+               qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
+                                  domain->qi_batch);
        else
                __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
 }
@@ -303,19 +378,20 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_
        sid = PCI_DEVID(info->bus, info->devfn);
 
        if (tag->pasid == IOMMU_NO_PASID) {
-               qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
-                                  addr, mask);
+               qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+                                      addr, mask, domain->qi_batch);
                if (info->dtlb_extra_inval)
-                       qi_flush_dev_iotlb(iommu, sid, info->pfsid,
-                                          info->ats_qdep, addr, mask);
+                       qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
+                                              addr, mask, domain->qi_batch);
                return;
        }
 
-       qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
-                                info->ats_qdep, addr, mask);
+       qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
+                                    info->ats_qdep, addr, mask, domain->qi_batch);
        if (info->dtlb_extra_inval)
-               qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
-                                        info->ats_qdep, addr, mask);
+               qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
+                                            info->ats_qdep, addr, mask,
+                                            domain->qi_batch);
 }
 
 static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
@@ -327,11 +403,11 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
        info = dev_iommu_priv_get(tag->dev);
        sid = PCI_DEVID(info->bus, info->devfn);
 
-       qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
-                          MAX_AGAW_PFN_WIDTH);
+       qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+                              MAX_AGAW_PFN_WIDTH, domain->qi_batch);
        if (info->dtlb_extra_inval)
-               qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
-                                  MAX_AGAW_PFN_WIDTH);
+               qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
+                                      MAX_AGAW_PFN_WIDTH, domain->qi_batch);
 }
 
 /*
@@ -341,6 +417,7 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
 void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
                           unsigned long end, int ih)
 {
+       struct intel_iommu *iommu = NULL;
        unsigned long pages, mask, addr;
        struct cache_tag *tag;
        unsigned long flags;
@@ -349,6 +426,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
 
        spin_lock_irqsave(&domain->cache_lock, flags);
        list_for_each_entry(tag, &domain->cache_tags, node) {
+               if (iommu && iommu != tag->iommu)
+                       qi_batch_flush_descs(iommu, domain->qi_batch);
+               iommu = tag->iommu;
+
                switch (tag->type) {
                case CACHE_TAG_IOTLB:
                case CACHE_TAG_NESTING_IOTLB:
@@ -372,6 +453,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
 
                trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
        }
+       qi_batch_flush_descs(iommu, domain->qi_batch);
        spin_unlock_irqrestore(&domain->cache_lock, flags);
 }
 
@@ -381,11 +463,16 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
  */
 void cache_tag_flush_all(struct dmar_domain *domain)
 {
+       struct intel_iommu *iommu = NULL;
        struct cache_tag *tag;
        unsigned long flags;
 
        spin_lock_irqsave(&domain->cache_lock, flags);
        list_for_each_entry(tag, &domain->cache_tags, node) {
+               if (iommu && iommu != tag->iommu)
+                       qi_batch_flush_descs(iommu, domain->qi_batch);
+               iommu = tag->iommu;
+
                switch (tag->type) {
                case CACHE_TAG_IOTLB:
                case CACHE_TAG_NESTING_IOTLB:
@@ -399,6 +486,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
 
                trace_cache_tag_flush_all(tag);
        }
+       qi_batch_flush_descs(iommu, domain->qi_batch);
        spin_unlock_irqrestore(&domain->cache_lock, flags);
 }
 
@@ -416,6 +504,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
 void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
                              unsigned long end)
 {
+       struct intel_iommu *iommu = NULL;
        unsigned long pages, mask, addr;
        struct cache_tag *tag;
        unsigned long flags;
@@ -424,7 +513,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
 
        spin_lock_irqsave(&domain->cache_lock, flags);
        list_for_each_entry(tag, &domain->cache_tags, node) {
-               struct intel_iommu *iommu = tag->iommu;
+               if (iommu && iommu != tag->iommu)
+                       qi_batch_flush_descs(iommu, domain->qi_batch);
+               iommu = tag->iommu;
 
                if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
                        iommu_flush_write_buffer(iommu);
@@ -437,5 +528,6 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
 
                trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
        }
+       qi_batch_flush_descs(iommu, domain->qi_batch);
        spin_unlock_irqrestore(&domain->cache_lock, flags);
 }