drm/xe: Add xe_gt_tlb_invalidation_range and convert PT layer to use this
authorMatthew Brost <matthew.brost@intel.com>
Thu, 25 Apr 2024 04:55:12 +0000 (21:55 -0700)
committerMatthew Brost <matthew.brost@intel.com>
Fri, 26 Apr 2024 19:10:07 +0000 (12:10 -0700)
xe_gt_tlb_invalidation_range accepts a start and end address rather than
a VMA. This will enable multiple VMAs to be invalidated in a single
invalidation. Update the PT layer to use this new function.

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Oak Zeng <oak.zeng@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240425045513.1913039-13-matthew.brost@intel.com
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
drivers/gpu/drm/xe/xe_pt.c

index 8e9c4b990fbbf083946e649a7a0efdeb1961451b..d0ee1e0df0bd3d9976af63b37c974cd05d2b1193 100644 (file)
@@ -263,11 +263,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
 }
 
 /**
- * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
+ * address range
+ *
  * @gt: graphics tile
  * @fence: invalidation fence which will be signal on TLB invalidation
  * completion, can be NULL
- * @vma: VMA to invalidate
+ * @start: start address
+ * @end: end address
+ * @asid: address space id
  *
  * Issue a range based TLB invalidation if supported, if not fallback to a full
  * TLB invalidation. Completion of TLB is asynchronous and caller can either use
@@ -277,17 +281,15 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
  * negative error code on error.
  */
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
-                              struct xe_gt_tlb_invalidation_fence *fence,
-                              struct xe_vma *vma)
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+                                struct xe_gt_tlb_invalidation_fence *fence,
+                                u64 start, u64 end, u32 asid)
 {
        struct xe_device *xe = gt_to_xe(gt);
 #define MAX_TLB_INVALIDATION_LEN       7
        u32 action[MAX_TLB_INVALIDATION_LEN];
        int len = 0;
 
-       xe_gt_assert(gt, vma);
-
        /* Execlists not supported */
        if (gt_to_xe(gt)->info.force_execlist) {
                if (fence)
@@ -301,8 +303,8 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
        if (!xe->info.has_range_tlb_invalidation) {
                action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
        } else {
-               u64 start = xe_vma_start(vma);
-               u64 length = xe_vma_size(vma);
+               u64 orig_start = start;
+               u64 length = end - start;
                u64 align, end;
 
                if (length < SZ_4K)
@@ -315,12 +317,12 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
                 * address mask covering the required range.
                 */
                align = roundup_pow_of_two(length);
-               start = ALIGN_DOWN(xe_vma_start(vma), align);
-               end = ALIGN(xe_vma_end(vma), align);
+               start = ALIGN_DOWN(start, align);
+               end = ALIGN(end, align);
                length = align;
                while (start + length < end) {
                        length <<= 1;
-                       start = ALIGN_DOWN(xe_vma_start(vma), length);
+                       start = ALIGN_DOWN(orig_start, length);
                }
 
                /*
@@ -329,16 +331,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
                 */
                if (length >= SZ_2M) {
                        length = max_t(u64, SZ_16M, length);
-                       start = ALIGN_DOWN(xe_vma_start(vma), length);
+                       start = ALIGN_DOWN(orig_start, length);
                }
 
                xe_gt_assert(gt, length >= SZ_4K);
                xe_gt_assert(gt, is_power_of_2(length));
-               xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1)));
+               xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
+                                                   ilog2(SZ_2M) + 1)));
                xe_gt_assert(gt, IS_ALIGNED(start, length));
 
                action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
-               action[len++] = xe_vma_vm(vma)->usm.asid;
+               action[len++] = asid;
                action[len++] = lower_32_bits(start);
                action[len++] = upper_32_bits(start);
                action[len++] = ilog2(length) - ilog2(SZ_4K);
@@ -349,6 +352,32 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
        return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
 }
 
+/**
+ * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
+ * @gt: graphics tile
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion, can be NULL
+ * @vma: VMA to invalidate
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can either use
+ * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
+ * completion.
+ *
+ * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
+ * negative error code on error.
+ */
+int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
+                              struct xe_gt_tlb_invalidation_fence *fence,
+                              struct xe_vma *vma)
+{
+       xe_gt_assert(gt, vma);
+
+       return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
+                                           xe_vma_end(vma),
+                                           xe_vma_vm(vma)->usm.asid);
+}
+
 /**
  * xe_gt_tlb_invalidation_wait - Wait for TLB to complete
  * @gt: graphics tile
index fbb743d80d2cefdafe440ad70b776f0b44d2bee5..bf3bebd9f985ba6b04fe73d67611e295de665c65 100644 (file)
@@ -20,6 +20,9 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
                               struct xe_gt_tlb_invalidation_fence *fence,
                               struct xe_vma *vma);
+int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
+                                struct xe_gt_tlb_invalidation_fence *fence,
+                                u64 start, u64 end, u32 asid);
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
 
index 5b7930f46cf36f148df9990a19f320ee104ea4f6..8d3765d3351eb3b8683711124963f62c484ddcb0 100644 (file)
@@ -1075,10 +1075,12 @@ static const struct xe_migrate_pt_update_ops userptr_bind_ops = {
 struct invalidation_fence {
        struct xe_gt_tlb_invalidation_fence base;
        struct xe_gt *gt;
-       struct xe_vma *vma;
        struct dma_fence *fence;
        struct dma_fence_cb cb;
        struct work_struct work;
+       u64 start;
+       u64 end;
+       u32 asid;
 };
 
 static const char *
@@ -1121,13 +1123,14 @@ static void invalidation_fence_work_func(struct work_struct *w)
                container_of(w, struct invalidation_fence, work);
 
        trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
-       xe_gt_tlb_invalidation_vma(ifence->gt, &ifence->base, ifence->vma);
+       xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
+                                    ifence->end, ifence->asid);
 }
 
 static int invalidation_fence_init(struct xe_gt *gt,
                                   struct invalidation_fence *ifence,
                                   struct dma_fence *fence,
-                                  struct xe_vma *vma)
+                                  u64 start, u64 end, u32 asid)
 {
        int ret;
 
@@ -1144,7 +1147,9 @@ static int invalidation_fence_init(struct xe_gt *gt,
        dma_fence_get(&ifence->base.base);      /* Ref for caller */
        ifence->fence = fence;
        ifence->gt = gt;
-       ifence->vma = vma;
+       ifence->start = start;
+       ifence->end = end;
+       ifence->asid = asid;
 
        INIT_WORK(&ifence->work, invalidation_fence_work_func);
        ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1295,8 +1300,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 
                /* TLB invalidation must be done before signaling rebind */
                if (ifence) {
-                       int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
-                                                         vma);
+                       int err = invalidation_fence_init(tile->primary_gt,
+                                                         ifence, fence,
+                                                         xe_vma_start(vma),
+                                                         xe_vma_end(vma),
+                                                         xe_vma_vm(vma)->usm.asid);
                        if (err) {
                                dma_fence_put(fence);
                                kfree(ifence);
@@ -1641,7 +1649,10 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
                        dma_fence_wait(fence, false);
 
                /* TLB invalidation must be done before signaling unbind */
-               err = invalidation_fence_init(tile->primary_gt, ifence, fence, vma);
+               err = invalidation_fence_init(tile->primary_gt, ifence, fence,
+                                             xe_vma_start(vma),
+                                             xe_vma_end(vma),
+                                             xe_vma_vm(vma)->usm.asid);
                if (err) {
                        dma_fence_put(fence);
                        kfree(ifence);