drm/amdkfd: add a new flag to manage where VRAM allocations go
authorAlex Deucher <alexander.deucher@amd.com>
Thu, 30 Jan 2025 20:12:58 +0000 (15:12 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 13 Feb 2025 02:04:08 +0000 (21:04 -0500)
On big and small APUs we send KFD VRAM allocations to GTT
since the carve out is either non-existent or relatively
small.  However, if someone sets the carve out size to be
relatively large, we may end up using GTT rather than VRAM.

No change of logic with this patch, but it allows the
driver to determine which logic to use based on the
carve out size in the future.

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.h

index f86daad4c77eb7ede782d848e809b6328651dd01..0dbea25ab888543bf7087bab23c3294980cfa3c9 100644 (file)
@@ -1193,6 +1193,11 @@ struct amdgpu_device {
        struct mutex                    enforce_isolation_mutex;
 
        struct amdgpu_init_level *init_lvl;
+
+       /* This flag is used to determine how VRAM allocations are handled for APUs
+        * in KFD: VRAM or GTT.
+        */
+       bool                            apu_prefer_gtt;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
index 2c1b38c5cfc625cbd1d9630cad6bf8fd52b264b0..0312231b703e10f1426d84cb4272fce6c7f4370b 100644 (file)
@@ -459,7 +459,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
                else
                        mem_info->local_mem_size_private =
                                        KFD_XCP_MEMORY_SIZE(adev, xcp->id);
-       } else if (adev->flags & AMD_IS_APU) {
+       } else if (adev->apu_prefer_gtt) {
                mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
                mem_info->local_mem_size_private = 0;
        } else {
@@ -818,7 +818,7 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
                }
                do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
                return ALIGN_DOWN(tmp, PAGE_SIZE);
-       } else if (adev->flags & AMD_IS_APU) {
+       } else if (adev->apu_prefer_gtt) {
                return (ttm_tt_pages_limit() << PAGE_SHIFT);
        } else {
                return adev->gmc.real_vram_size;
index 2b4d3840e5bca2dddde470adecf6fcccadd51d61..60062c10b083fb79498225f59d634989cde8fc22 100644 (file)
@@ -197,7 +197,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                        return -EINVAL;
 
                vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
-               if (adev->flags & AMD_IS_APU) {
+               if (adev->apu_prefer_gtt) {
                        system_mem_needed = size;
                        ttm_mem_needed = size;
                }
@@ -234,7 +234,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
        if (adev && xcp_id >= 0) {
                adev->kfd.vram_used[xcp_id] += vram_needed;
                adev->kfd.vram_used_aligned[xcp_id] +=
-                               (adev->flags & AMD_IS_APU) ?
+                               adev->apu_prefer_gtt ?
                                vram_needed :
                                ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
        }
@@ -262,7 +262,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
 
                if (adev) {
                        adev->kfd.vram_used[xcp_id] -= size;
-                       if (adev->flags & AMD_IS_APU) {
+                       if (adev->apu_prefer_gtt) {
                                adev->kfd.vram_used_aligned[xcp_id] -= size;
                                kfd_mem_limit.system_mem_used -= size;
                                kfd_mem_limit.ttm_mem_used -= size;
@@ -890,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
         * if peer device has large BAR. In contrast, access over xGMI is
         * allowed for both small and large BAR configurations of peer device
         */
-       if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) &&
+       if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
            ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -1667,7 +1667,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
                - reserved_for_pt
                - reserved_for_ras;
 
-       if (adev->flags & AMD_IS_APU) {
+       if (adev->apu_prefer_gtt) {
                system_mem_available = no_system_mem_limit ?
                                        kfd_mem_limit.max_system_mem_limit :
                                        kfd_mem_limit.max_system_mem_limit -
@@ -1715,7 +1715,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
 
-               if (adev->flags & AMD_IS_APU) {
+               if (adev->apu_prefer_gtt) {
                        domain = AMDGPU_GEM_DOMAIN_GTT;
                        alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
                        alloc_flags = 0;
@@ -1966,7 +1966,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
        if (size) {
                if (!is_imported &&
                   (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
-                  ((adev->flags & AMD_IS_APU) &&
+                  (adev->apu_prefer_gtt &&
                    mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
                        *size = bo_size;
                else
@@ -2393,7 +2393,7 @@ static int import_obj_create(struct amdgpu_device *adev,
        (*mem)->bo = bo;
        (*mem)->va = va;
        (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
-                        !(adev->flags & AMD_IS_APU) ?
+                        !adev->apu_prefer_gtt ?
                         AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
 
        (*mem)->mapped_to_gpu_memory = 0;
index 3f61c999fde3028393eda21c61e6dd579960694f..e68324ee4b2591a04dbdb373b315402814a2dac1 100644 (file)
@@ -1987,6 +1987,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
        DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
                 (unsigned int)(gtt_size / (1024 * 1024)));
 
+       if (adev->flags & AMD_IS_APU)
+               adev->apu_prefer_gtt = true;
+
        /* Initialize doorbell pool on PCI BAR */
        r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
        if (r) {
index d05d199b5e44828c9a1caac184a54d6495192609..79251f22b70220550a80937a8c4ad92645a5b35d 100644 (file)
@@ -1027,7 +1027,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
        if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
                return -EINVAL;
 
-       if (adev->flags & AMD_IS_APU)
+       if (adev->apu_prefer_gtt)
                return 0;
 
        pgmap = &kfddev->pgmap;
index e32e19196f6b2ba94ed38df7bc3d2582c3dd6f22..db3034b00dac24c3b2d79faa555bc9ca1504933a 100644 (file)
@@ -2692,7 +2692,7 @@ svm_range_best_restore_location(struct svm_range *prange,
                return -1;
        }
 
-       if (node->adev->flags & AMD_IS_APU)
+       if (node->adev->apu_prefer_gtt)
                return 0;
 
        if (prange->preferred_loc == gpuid ||
@@ -3441,7 +3441,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
                goto out;
        }
 
-       if (bo_node->adev->flags & AMD_IS_APU) {
+       if (bo_node->adev->apu_prefer_gtt) {
                best_loc = 0;
                goto out;
        }
index bddd24f04669e87a4d34e639c5d0482b2391afc8..6ea23c78009ce1b0a3a2c9b6647f56e991cc5049 100644 (file)
@@ -202,7 +202,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
  * is initialized to not 0 when page migration register device memory.
  */
 #define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
-                                       ((adev)->flags & AMD_IS_APU))
+                                       ((adev)->apu_prefer_gtt))
 
 void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);