drm/amdkfd: Store queue cwsr area size to node properties

author Philip Yang <Philip.Yang@amd.com>

Wed, 26 Jun 2024 18:52:28 +0000 (14:52 -0400)

committer Alex Deucher <alexander.deucher@amd.com>

Wed, 24 Jul 2024 18:45:58 +0000 (14:45 -0400)
author Philip Yang <Philip.Yang@amd.com>
Wed, 26 Jun 2024 18:52:28 +0000 (14:52 -0400)
committer Alex Deucher <alexander.deucher@amd.com>
Wed, 24 Jul 2024 18:45:58 +0000 (14:45 -0400)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index c31589043d5bd21cc430f36f2220264f508442bb..b5cae48dff669939efbe4b4931766d2986c9d703 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1295,6 +1295,7 @@ int kfd_queue_buffer_get(struct amdgpu_vm *vm, void __user *addr, struct amdgpu_
  void kfd_queue_buffer_put(struct amdgpu_vm *vm, struct amdgpu_bo **bo);
  int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
  int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_properties *properties);
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev);
  
  struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
                 struct kfd_node *dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c

index 67242ce051b5c0fd97a21a370fbd68ff54f4b8ad..adcda9730c9feaa94884383fc6168ff05c7b64af 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -24,6 +24,7 @@
  
  #include <linux/slab.h>
  #include "kfd_priv.h"
+#include "kfd_topology.h"
  #include "kfd_svm.h"
  
  void print_queue_properties(struct queue_properties *q)
@@ -305,3 +306,77 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope
                                  properties->ctx_save_restore_area_size);
         return 0;
  }
+
+#define SGPR_SIZE_PER_CU       0x4000
+#define LDS_SIZE_PER_CU                0x10000
+#define HWREG_SIZE_PER_CU      0x1000
+#define DEBUGGER_BYTES_ALIGN   64
+#define DEBUGGER_BYTES_PER_WAVE        32
+
+static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
+{
+       u32 vgpr_size = 0x40000;
+
+       if ((gfxv / 100 * 100) == 90400 ||      /* GFX_VERSION_AQUA_VANJARAM */
+           gfxv == 90010 ||                    /* GFX_VERSION_ALDEBARAN */
+           gfxv == 90008)                      /* GFX_VERSION_ARCTURUS */
+               vgpr_size = 0x80000;
+       else if (gfxv == 110000 ||              /* GFX_VERSION_PLUM_BONITO */
+                gfxv == 110001 ||              /* GFX_VERSION_WHEAT_NAS */
+                gfxv == 120000 ||              /* GFX_VERSION_GFX1200 */
+                gfxv == 120001)                /* GFX_VERSION_GFX1201 */
+               vgpr_size = 0x60000;
+
+       return vgpr_size;
+}
+
+#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv)      \
+       (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
+        LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU)
+
+#define CNTL_STACK_BYTES_PER_WAVE(gfxv)        \
+       ((gfxv) >= 100100 ? 12 : 8)     /* GFX_VERSION_NAVI10*/
+
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
+void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev)
+{
+       struct kfd_node_properties *props = &dev->node_props;
+       u32 gfxv = props->gfx_target_version;
+       u32 ctl_stack_size;
+       u32 wg_data_size;
+       u32 wave_num;
+       u32 cu_num;
+
+       if (gfxv < 80001)       /* GFX_VERSION_CARRIZO */
+               return;
+
+       cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask);
+       wave_num = (gfxv < 100100) ?    /* GFX_VERSION_NAVI10 */
+                   min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512)
+                   : cu_num * 32;
+
+       wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE);
+       ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
+       ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+                              PAGE_SIZE);
+
+       if ((gfxv / 10000 * 10000) == 100000) {
+               /* HW design limits control stack size to 0x7000.
+                * This is insufficient for theoretical PM4 cases
+                * but sufficient for AQL, limited by SPI events.
+                */
+               ctl_stack_size = min(ctl_stack_size, 0x7000);
+       }
+
+       props->ctl_stack_size = ctl_stack_size;
+       props->debug_memory_size = ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+       props->cwsr_size = ctl_stack_size + wg_data_size;
+
+       if (gfxv == 80002)      /* GFX_VERSION_TONGA */
+               props->eop_buffer_size = 0x8000;
+       else if ((gfxv / 100 * 100) == 90400)   /* GFX_VERSION_AQUA_VANJARAM */
+               props->eop_buffer_size = 4096;
+       else if (gfxv >= 80000)
+               props->eop_buffer_size = 4096;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c

index 6f89b06f89d3803a4028e869e99fb0f72a3b9beb..a9b3eda65a2ccbf16fcfc4f1c6ab302d77e69d50 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -2120,6 +2120,8 @@ int kfd_topology_add_device(struct kfd_node *gpu)
                 dev->gpu->adev->gmc.xgmi.connected_to_cpu)
                 dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
  
+       kfd_queue_ctx_save_restore_size(dev);
+
         kfd_debug_print_topology();
  
         kfd_notify_gpu_change(gpu_id, 1);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h

index 2d1c9d771bef2df57cbac276b151a7bb5fb766a4..43ba0d32e5bd7145d41855eb07ed3ff5c6a23b66 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -74,6 +74,10 @@ struct kfd_node_properties {
         uint32_t num_sdma_xgmi_engines;
         uint32_t num_sdma_queues_per_engine;
         uint32_t num_cp_queues;
+       uint32_t cwsr_size;
+       uint32_t ctl_stack_size;
+       uint32_t eop_buffer_size;
+       uint32_t debug_memory_size;
         char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
  };
author	Philip Yang <Philip.Yang@amd.com>
	Wed, 26 Jun 2024 18:52:28 +0000 (14:52 -0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Wed, 24 Jul 2024 18:45:58 +0000 (14:45 -0400)
drivers/gpu/drm/amd/amdkfd/kfd_priv.h		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdkfd/kfd_queue.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdkfd/kfd_topology.c		patch \| blob \| blame \| history
drivers/gpu/drm/amd/amdkfd/kfd_topology.h		patch \| blob \| blame \| history