drm/amdgpu: Update BO memory accounting to rely on allocation flag
authorRamesh Errabolu <Ramesh.Errabolu@amd.com>
Tue, 9 Nov 2021 05:37:27 +0000 (23:37 -0600)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 22 Nov 2021 19:45:01 +0000 (14:45 -0500)
Accounting system to track amount of available memory (system, TTM
and VRAM of a device) relies on BO's domain. The change is to rely
instead on allocation flag indicating BO type - VRAM, GTT, USERPTR,
MMIO or DOORBELL

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index d00de575c5414828b5f3e7ee66d7573c3fa2c4bf..fcbc8a9c9e06d7161a24d3ff03012792976a0fb8 100644 (file)
@@ -301,6 +301,12 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
                                struct amdgpu_vm *vm);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
+ *
+ * Allows KFD to release its resources associated with the GEM object.
+ */
 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
index 94fccf0b47ad3ac68004be8dd1d205065e582b05..cfc84af682b1e40463fd7264c6cd195b75c3f9f4 100644 (file)
@@ -120,8 +120,19 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
                PAGE_ALIGN(size);
 }
 
+/**
+ * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer including any reserved for control structures
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ *
+ * Return: returns -ENOMEM in case of error, ZERO otherwise
+ */
 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 domain, bool sg)
+               uint64_t size, u32 alloc_flag)
 {
        uint64_t reserved_for_pt =
                ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
@@ -131,20 +142,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
        acc_size = amdgpu_amdkfd_acc_size(size);
 
        vram_needed = 0;
-       if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-               /* TTM GTT memory */
+       if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
                system_mem_needed = acc_size + size;
                ttm_mem_needed = acc_size + size;
-       } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
-               /* Userptr */
+       } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+               system_mem_needed = acc_size;
+               ttm_mem_needed = acc_size;
+               vram_needed = size;
+       } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                system_mem_needed = acc_size + size;
                ttm_mem_needed = acc_size;
-       } else {
-               /* VRAM and SG */
+       } else if (alloc_flag &
+                  (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+                   KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
                system_mem_needed = acc_size;
                ttm_mem_needed = acc_size;
-               if (domain == AMDGPU_GEM_DOMAIN_VRAM)
-                       vram_needed = size;
+       } else {
+               pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+               return -ENOMEM;
        }
 
        spin_lock(&kfd_mem_limit.mem_limit_lock);
@@ -160,64 +175,72 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
            (adev->kfd.vram_used + vram_needed >
             adev->gmc.real_vram_size - reserved_for_pt)) {
                ret = -ENOMEM;
-       } else {
-               kfd_mem_limit.system_mem_used += system_mem_needed;
-               kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
-               adev->kfd.vram_used += vram_needed;
+               goto release;
        }
 
+       /* Update memory accounting by decreasing available system
+        * memory, TTM memory and GPU memory as computed above
+        */
+       adev->kfd.vram_used += vram_needed;
+       kfd_mem_limit.system_mem_used += system_mem_needed;
+       kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
        return ret;
 }
 
 static void unreserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 domain, bool sg)
+               uint64_t size, u32 alloc_flag)
 {
        size_t acc_size;
 
        acc_size = amdgpu_amdkfd_acc_size(size);
 
        spin_lock(&kfd_mem_limit.mem_limit_lock);
-       if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+
+       if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
                kfd_mem_limit.system_mem_used -= (acc_size + size);
                kfd_mem_limit.ttm_mem_used -= (acc_size + size);
-       } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+       } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+               kfd_mem_limit.system_mem_used -= acc_size;
+               kfd_mem_limit.ttm_mem_used -= acc_size;
+               adev->kfd.vram_used -= size;
+       } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                kfd_mem_limit.system_mem_used -= (acc_size + size);
                kfd_mem_limit.ttm_mem_used -= acc_size;
-       } else {
+       } else if (alloc_flag &
+                  (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+                   KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
                kfd_mem_limit.system_mem_used -= acc_size;
                kfd_mem_limit.ttm_mem_used -= acc_size;
-               if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-                       adev->kfd.vram_used -= size;
-                       WARN_ONCE(adev->kfd.vram_used < 0,
-                                 "kfd VRAM memory accounting unbalanced");
-               }
+       } else {
+               pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+               goto release;
        }
-       WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
-                 "kfd system memory accounting unbalanced");
+
+       WARN_ONCE(adev->kfd.vram_used < 0,
+                 "KFD VRAM memory accounting unbalanced");
        WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
-                 "kfd TTM memory accounting unbalanced");
+                 "KFD TTM memory accounting unbalanced");
+       WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+                 "KFD system memory accounting unbalanced");
 
+release:
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       u32 domain = bo->preferred_domains;
-       bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
+       u32 alloc_flags = bo->kfd_bo->alloc_flags;
+       u64 size = amdgpu_bo_size(bo);
 
-       if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
-               domain = AMDGPU_GEM_DOMAIN_CPU;
-               sg = false;
-       }
-
-       unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+       unreserve_mem_limit(adev, size, alloc_flags);
 
        kfree(bo->kfd_bo);
 }
 
-
 /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
  *  reservation object.
  *
@@ -1452,7 +1475,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
        amdgpu_sync_create(&(*mem)->sync);
 
-       ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
+       ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
        if (ret) {
                pr_debug("Insufficient memory\n");
                goto err_reserve_limit;
@@ -1508,7 +1531,7 @@ err_node_allow:
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
 err_bo_create:
-       unreserve_mem_limit(adev, size, alloc_domain, !!sg);
+       unreserve_mem_limit(adev, size, flags);
 err_reserve_limit:
        mutex_destroy(&(*mem)->lock);
        kfree(*mem);