drm/amdkfd: Move TLB flushing logic into amdgpu
authorFelix Kuehling <Felix.Kuehling@amd.com>
Fri, 24 Feb 2023 23:22:32 +0000 (18:22 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 17 Nov 2023 14:29:53 +0000 (09:29 -0500)
This will make it possible for amdgpu GEM ioctls to flush TLBs on compute
VMs.

This removes VMID-based TLB flushing and always uses PASID-based
flushing. This still works because it scans the VMID-PASID mapping
registers to find the right VMID. It's only slightly less efficient. This
is not a production use case.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index b8412202a1b0e1339f37756a17fac6cd0733b453..6ab17330a6ed2493859ddc782da76844e4cdf625 100644 (file)
@@ -710,35 +710,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
        return false;
 }
 
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-                                    uint16_t vmid)
-{
-       if (adev->family == AMDGPU_FAMILY_AI) {
-               int i;
-
-               for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
-                       amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
-       } else {
-               amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
-       }
-
-       return 0;
-}
-
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-                                     uint16_t pasid,
-                                     enum TLB_FLUSH_TYPE flush_type,
-                                     uint32_t inst)
-{
-       bool all_hub = false;
-
-       if (adev->family == AMDGPU_FAMILY_AI ||
-           adev->family == AMDGPU_FAMILY_RV)
-               all_hub = true;
-
-       return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
-}
-
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
 {
        return adev->have_atomics_support;
index dac983da961d6a059ecbd252c09d92cf6e76ed69..16794c2eea35d1ab9ff7ebf9d00e3d3dbc218a5c 100644 (file)
@@ -162,11 +162,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
                                uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
 bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
-                               uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
-                               uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
-                               uint32_t inst);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
index d1b8afd105c9f6a8e56288f7b3c023171f7b2f75..d59154ddaaed34c90ff5cb06e2ab791fd4ae14d3 100644 (file)
@@ -1437,6 +1437,50 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
        return 0;
 }
 
+/**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+                               struct amdgpu_vm *vm,
+                               uint32_t flush_type,
+                               uint32_t xcc_mask)
+{
+       uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+       bool all_hub = false;
+       int xcc = 0, r = 0;
+
+       WARN_ON_ONCE(!vm->is_compute_context);
+
+       /*
+        * It can be that we race and lose here, but that is extremely unlikely
+        * and the worst thing which could happen is that we flush the changes
+        * into the TLB once more which is harmless.
+        */
+       if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+               return 0;
+
+       if (adev->family == AMDGPU_FAMILY_AI ||
+           adev->family == AMDGPU_FAMILY_RV)
+               all_hub = true;
+
+       for_each_inst(xcc, xcc_mask) {
+               r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+                                                  all_hub, xcc);
+               if (r)
+                       break;
+       }
+       return r;
+}
+
 /**
  * amdgpu_vm_bo_add - add a bo to a specific vm
  *
index 2cd86d2bf73f7af67fd78be273cd57fc68521e11..b6cd565562ad8d9a99270757fc2b37352600d2f3 100644 (file)
@@ -324,6 +324,7 @@ struct amdgpu_vm {
        /* Last finished delayed update */
        atomic64_t              tlb_seq;
        struct dma_fence        *last_tlb_flush;
+       atomic64_t              kfd_last_flushed_seq;
 
        /* How many times we had to re-generate the page tables */
        uint64_t                generation;
@@ -445,6 +446,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
                           struct amdgpu_vm *vm,
                           struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+                               struct amdgpu_vm *vm,
+                               uint32_t flush_type,
+                               uint32_t xcc_mask);
 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
                            struct amdgpu_vm *vm, struct amdgpu_bo *bo);
 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
index 9cc32f577e38ac87f6f678dcb936dd33577dbb91..a40f8cfc6aa57c807c6ac9adc0ee4cbfd05f033c 100644 (file)
@@ -748,7 +748,6 @@ struct kfd_process_device {
        /* VM context for GPUVM allocations */
        struct file *drm_file;
        void *drm_priv;
-       atomic64_t tlb_seq;
 
        /* GPUVM allocations storage */
        struct idr alloc_idr;
@@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev);
 
 void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
+                                enum TLB_FLUSH_TYPE type)
+{
+       struct amdgpu_device *adev = pdd->dev->adev;
+       struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+       amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+}
 
 static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
 {
index 7a33e06f5c90093c775421927d45b2afce726957..c10d050e1a6123a92f3bbf5d717bdc1d1c36d690 100644 (file)
@@ -1667,7 +1667,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
                return ret;
        }
        pdd->drm_priv = drm_file->private_data;
-       atomic64_set(&pdd->tlb_seq, 0);
 
        ret = kfd_process_device_reserve_ib_mem(pdd);
        if (ret)
@@ -2059,36 +2058,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
                               KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
 }
 
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
-       struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
-       uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
-       struct kfd_node *dev = pdd->dev;
-       uint32_t xcc_mask = dev->xcc_mask;
-       int xcc = 0;
-
-       /*
-        * It can be that we race and lose here, but that is extremely unlikely
-        * and the worst thing which could happen is that we flush the changes
-        * into the TLB once more which is harmless.
-        */
-       if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
-               return;
-
-       if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
-               /* Nothing to flush until a VMID is assigned, which
-                * only happens when the first queue is created.
-                */
-               if (pdd->qpd.vmid)
-                       amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
-                                                       pdd->qpd.vmid);
-       } else {
-               for_each_inst(xcc, xcc_mask)
-                       amdgpu_amdkfd_flush_gpu_tlb_pasid(
-                               dev->adev, pdd->process->pasid, type, xcc);
-       }
-}
-
 /* assumes caller holds process lock. */
 int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
 {