Initial support for GC 9.5.0.
v2: squash in pqm_clean_queue_resource() fix from Lijo
Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
*pcache_info);
return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) ||
KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
}
case IP_VERSION(4, 4, 0):/* ALDEBARAN */
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
case IP_VERSION(5, 0, 0):/* NAVI10 */
case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
case IP_VERSION(5, 0, 2):/* NAVI14 */
break;
case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
case IP_VERSION(9, 4, 4): /* GC 9.4.4 */
+ case IP_VERSION(9, 5, 0): /* GC 9.5.0 */
kfd->device_info.event_interrupt_class =
&event_interrupt_class_v9_4_3;
break;
gfx_target_version = 90402;
f2g = &gc_9_4_3_kfd2kgd;
break;
+ case IP_VERSION(9, 5, 0):
+ gfx_target_version = 90500;
+ f2g = &gc_9_4_3_kfd2kgd;
+ break;
/* Navi10 */
case IP_VERSION(10, 1, 10):
gfx_target_version = 100100;
> KFD_CWSR_TMA_OFFSET);
kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
> KFD_CWSR_TMA_OFFSET);
&& kfd->mec2_fw_version >= 0x28) ||
(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) ||
+ (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) ||
(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
&& kfd->mec2_fw_version >= 0x6b) ||
last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
- /* For GFX9.4.3, we need special handling for VMIDs depending on
- * partition mode.
+ /* For multi-partition capable GPUs, we need special handling for VMIDs
+ * depending on partition mode.
* In CPX mode, the VMID range needs to be shared between XCDs.
* Additionally, there are 13 VMIDs (3-15) available for KFD. To
* divide them equally, we change starting VMID to 4 and not use
* VMID 3.
- * If the VMID range changes for GFX9.4.3, then this code MUST be
- * revisited.
+ * If the VMID range changes for multi-partition capable GPUs, then
+ * this code MUST be revisited.
*/
if (kfd->adev->xcp_mgr) {
partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
/*
- * For GFX9.4.3, the KFD abstracts all partitions within a socket as
- * xGMI connected in the topology so assign a unique hive id per
- * device based on the pci device location if device is in PCIe mode.
+ * For multi-partition capable GPUs, the KFD abstracts all partitions
+ * within a socket as xGMI connected in the topology so assign a unique
+ * hive id per device based on the pci device location if device is in
+ * PCIe mode.
*/
- if (!kfd->hive_id &&
- (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- kfd->num_nodes > 1)
+ if (!kfd->hive_id && kfd->num_nodes > 1)
kfd->hive_id = pci_dev_id(kfd->adev->pdev);
kfd->noretry = kfd->adev->gmc.noretry;
KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
}
- if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) &&
- partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+ if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
kfd->num_nodes != 1) {
- /* For GFX9.4.3 and CPX mode, first XCD gets VMID range
- * 4-9 and second XCD gets VMID range 10-15.
+ /* For multi-partition capable GPUs and CPX mode, first
+ * XCD gets VMID range 4-9 and second XCD gets VMID
+ * range 10-15.
*/
node->vm_info.first_vmid_kfd = (i%2 == 0) ?
amdgpu_amdkfd_get_local_mem_info(kfd->adev,
&node->local_mem_info, node->xcp);
- if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4))
+ if (kfd->adev->xcp_mgr)
kfd_setup_interrupt_bitmap(node, i);
/* Initialize the KFD node */
qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0))
qpd->sh_mem_config |=
(1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
m->compute_static_thread_mgmt_se2 = se_mask[2];
m->compute_static_thread_mgmt_se3 = se_mask[3];
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) {
m->compute_static_thread_mgmt_se4 = se_mask[4];
m->compute_static_thread_mgmt_se5 = se_mask[5];
m->compute_static_thread_mgmt_se6 = se_mask[6];
m->cp_hqd_ctx_save_control = 0;
if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0))
update_cu_mask(mm, mqd, minfo, 0);
set_priority(m, q);
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_v9_4_3;
mqd->load_mqd = load_mqd_v9_4_3;
mqd->update_mqd = update_mqd_v9_4_3;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
+ mqd->check_preemption_failed = check_preemption_failed;
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) {
mqd->init_mqd = init_mqd_hiq_v9_4_3;
mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
default:
if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0))
pm->pmf = &kfd_aldebaran_pm_funcs;
else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
pm->pmf = &kfd_v9_pm_funcs;
#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \
- (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)))
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)))
struct kfd_node;
uint32_t i;
if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0))
return dev->nodes[0];
for (i = 0; i < dev->num_nodes; i++)
irq_drain_fence[3] = pdd->process->pasid;
/*
- * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+ * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3]
*/
if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) {
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) {
node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
irq_drain_fence[3] |= node_id << 16;
}
if (!gws && pdd->qpd.num_gws == 0)
return -EINVAL;
- if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
- KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) &&
!dev->kfd->shared_resources.enable_mes) {
if (gws)
ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
if (pqn->q->gws) {
if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) &&
+ KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) &&
!dev->kfd->shared_resources.enable_mes)
amdgpu_amdkfd_remove_gws_from_process(
pqm->process->kgd_process_info, pqn->q->gws);
unsigned int max_queues = 127; /* HWS limit */
/*
- * On GFX 9.4.3, increase the number of queues that
- * can be created to 255. No HWS limit on GFX 9.4.3.
+ * On GFX 9.4.3/9.5.0, increase the number of queues that
+ * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
*/
if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))
max_queues = 255;
q = NULL;
pcache->cacheline_size = pcache_info[cache_type].cache_line_size;
if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) ||
- KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4))
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) ||
+ KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0))
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
else
mode = UNKNOWN_MEMORY_PARTITION_MODE;