node->vm_info.vmid_num_kfd = vmid_num_kfd;
node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
/* TODO : Check if error handling is needed */
- if (node->xcp)
+ if (node->xcp) {
amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX,
&node->xcc_mask);
- else
+ ++xcp_idx;
+ } else {
node->xcc_mask =
(1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
-
- node->num_xcc_per_node = max(1U, kfd->adev->gfx.num_xcc_per_xcp);
- node->start_xcc_id = node->num_xcc_per_node * i;
+ }
if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
partition_mode == AMDGPU_CPX_PARTITION_MODE &&
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- int xcc = 0;
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ int xcc_id;
- for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
+ for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_sh_mem_settings(
- dqm->dev->adev, qpd->vmid,
- qpd->sh_mem_config,
- qpd->sh_mem_ape1_base,
- qpd->sh_mem_ape1_limit,
- qpd->sh_mem_bases,
- dqm->dev->start_xcc_id + xcc);
+ dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
+ qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit,
+ qpd->sh_mem_bases, xcc_id);
}
static void kfd_hws_hang(struct device_queue_manager *dqm)
static void program_trap_handler_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- int xcc = 0;
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ int xcc_id;
if (dqm->dev->kfd2kgd->program_trap_handler_settings)
- for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
+ for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_trap_handler_settings(
- dqm->dev->adev, qpd->vmid,
- qpd->tba_addr, qpd->tma_addr,
- dqm->dev->start_xcc_id + xcc);
+ dqm->dev->adev, qpd->vmid, qpd->tba_addr,
+ qpd->tma_addr, xcc_id);
}
static int allocate_vmid(struct device_queue_manager *dqm,
struct kfd_process_device *pdd;
int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
- int xcc = 0;
+ uint32_t xcc_mask = dev->xcc_mask;
+ int xcc_id;
reg_sq_cmd.u32All = 0;
reg_gfx_index.u32All = 0;
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
reg_sq_cmd.bits.vm_id = vmid;
- for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++)
- dev->kfd2kgd->wave_control_execute(dev->adev,
- reg_gfx_index.u32All,
- reg_sq_cmd.u32All,
- dev->start_xcc_id + xcc);
+ for_each_inst(xcc_id, xcc_mask)
+ dev->kfd2kgd->wave_control_execute(
+ dev->adev, reg_gfx_index.u32All,
+ reg_sq_cmd.u32All, xcc_id);
return 0;
}
set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
- int xcc = 0, ret;
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ int xcc_id, ret;
- for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
+ for_each_inst(xcc_id, xcc_mask) {
ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
- dqm->dev->adev, pasid, vmid,
- dqm->dev->start_xcc_id + xcc);
+ dqm->dev->adev, pasid, vmid, xcc_id);
if (ret)
break;
}
static void init_interrupts(struct device_queue_manager *dqm)
{
- unsigned int i, xcc;
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
+ unsigned int i, xcc_id;
for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
if (is_pipe_enabled(dqm, 0, i)) {
- for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
+ for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->init_interrupts(
- dqm->dev->adev, i,
- dqm->dev->start_xcc_id +
- xcc);
+ dqm->dev->adev, i, xcc_id);
}
}
}
get_num_all_sdma_engines(dqm) *
dev->kfd->device_info.num_sdma_queues_per_engine +
(dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
- dqm->dev->num_xcc_per_node);
+ NUM_XCC(dqm->dev->xcc_mask));
retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
int dqm_debugfs_hqds(struct seq_file *m, void *data)
{
struct device_queue_manager *dqm = data;
+ uint32_t xcc_mask = dqm->dev->xcc_mask;
uint32_t (*dump)[2], n_regs;
int pipe, queue;
- int r = 0, xcc;
- uint32_t inst;
+ int r = 0, xcc_id;
uint32_t sdma_engine_start;
if (!dqm->sched_running) {
return 0;
}
- for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
- inst = dqm->dev->start_xcc_id + xcc;
+ for_each_inst(xcc_id, xcc_mask) {
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
- KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
- &dump, &n_regs, inst);
+ KFD_CIK_HIQ_PIPE,
+ KFD_CIK_HIQ_QUEUE, &dump,
+ &n_regs, xcc_id);
if (!r) {
- seq_printf(m,
+ seq_printf(
+ m,
" Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
- inst, KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
- KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
+ xcc_id,
+ KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1,
+ KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm),
KFD_CIK_HIQ_QUEUE);
seq_reg_dump(m, dump, n_regs);
dqm->dev->kfd->shared_resources.cp_queue_bitmap))
continue;
- r = dqm->dev->kfd2kgd->hqd_dump(
- dqm->dev->adev, pipe, queue, &dump, &n_regs, inst);
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
+ pipe, queue,
+ &dump, &n_regs,
+ xcc_id);
if (r)
break;
- seq_printf(m, " Inst %d, CP Pipe %d, Queue %d\n",
- inst, pipe, queue);
+ seq_printf(m,
+ " Inst %d, CP Pipe %d, Queue %d\n",
+ xcc_id, pipe, queue);
seq_reg_dump(m, dump, n_regs);
kfree(dump);
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
- dev->num_xcc_per_node;
+ NUM_XCC(dev->xcc_mask);
mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ offset);
retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev,
(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
- node->num_xcc_per_node,
+ NUM_XCC(node->xcc_mask),
&(mqd_mem_obj->gtt_mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
- int xcc, err;
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
void *xcc_mqd;
- uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
- xcc_mqd = mqd + hiq_mqd_size * xcc;
+ for_each_inst(xcc_id, xcc_mask) {
+ xcc_mqd = mqd + hiq_mqd_size * inst;
err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
pipe_id, queue_id,
- p->doorbell_off,
- start_inst+xcc);
+ p->doorbell_off, xcc_id);
if (err) {
- pr_debug("Failed to load HIQ MQD for XCC: %d\n", xcc);
+ pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
break;
}
+ ++inst;
}
return err;
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
{
- int xcc = 0, err;
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
void *xcc_mqd;
- uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
- xcc_mqd = mqd + hiq_mqd_size * xcc;
+ for_each_inst(xcc_id, xcc_mask) {
+ xcc_mqd = mqd + hiq_mqd_size * inst;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
- queue_id, start_inst+xcc);
+ queue_id, xcc_id);
if (err) {
- pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
+ pr_debug("Destroy MQD failed for xcc: %d\n", inst);
break;
}
+ ++inst;
}
return err;
uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;
memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
m->compute_tg_chunk_size = 1;
m->compute_current_logic_xcc_id =
(local_xcc_start + xcc) %
- mm->dev->num_xcc_per_node;
+ NUM_XCC(mm->dev->xcc_mask);
switch (xcc) {
case 0:
int xcc = 0;
uint64_t size = mm->mqd_stride(mm, q);
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
{
- int xcc = 0, err;
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
void *xcc_mqd;
struct v9_mqd *m;
uint64_t mqd_offset;
- uint32_t start_inst = mm->dev->start_xcc_id;
m = get_mqd(mqd);
mqd_offset = m->cp_mqd_stride_size;
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
- xcc_mqd = mqd + mqd_offset * xcc;
+ for_each_inst(xcc_id, xcc_mask) {
+ xcc_mqd = mqd + mqd_offset * inst;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
- queue_id, start_inst+xcc);
+ queue_id, xcc_id);
if (err) {
- pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
+ pr_debug("Destroy MQD failed for xcc: %d\n", inst);
break;
}
+ ++inst;
}
return err;
{
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
- int xcc = 0, err;
+ uint32_t xcc_mask = mm->dev->xcc_mask;
+ int xcc_id, err, inst = 0;
void *xcc_mqd;
- uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
- xcc_mqd = mqd + mqd_stride_size * xcc;
- err = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, xcc_mqd,
- pipe_id, queue_id,
- (uint32_t __user *)p->write_ptr,
- wptr_shift, 0, mms, start_inst+xcc);
+ for_each_inst(xcc_id, xcc_mask) {
+ xcc_mqd = mqd + mqd_stride_size * inst;
+ err = mm->dev->kfd2kgd->hqd_load(
+ mm->dev->adev, xcc_mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
+ xcc_id);
if (err) {
- pr_debug("Load MQD failed for xcc: %d\n", xcc);
+ pr_debug("Load MQD failed for xcc: %d\n", inst);
break;
}
+ ++inst;
}
return err;
uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
- for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
+ for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
xcc_mqd = mqd + mqd_stride_size * xcc;
xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
q->ctx_save_restore_area_size * xcc);
*/
struct kfd_vmid_info vm_info;
unsigned int id; /* topology stub index */
- unsigned int num_xcc_per_node;
- unsigned int start_xcc_id; /* Starting XCC instance
- * number for the node
- */
uint32_t xcc_mask; /* Instance mask of XCCs present */
struct amdgpu_xcp *xcp;
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
struct kfd_node *dev = pdd->dev;
+ uint32_t xcc_mask = dev->xcc_mask;
int xcc = 0;
/*
amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
pdd->qpd.vmid);
} else {
- for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++)
- amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,
- pdd->process->pasid, type,
- dev->start_xcc_id + xcc);
+ for_each_inst(xcc, xcc_mask)
+ amdgpu_amdkfd_flush_gpu_tlb_pasid(
+ dev->adev, pdd->process->pasid, type, xcc);
}
}
seq_printf(m, " Compute queue on device %x\n",
q->device->id);
mqd_type = KFD_MQD_TYPE_CP;
- num_xccs = q->device->num_xcc_per_node;
+ num_xccs = NUM_XCC(q->device->xcc_mask);
break;
default:
seq_printf(m,
dev->node_props.cpu_cores_count);
sysfs_show_32bit_prop(buffer, offs, "simd_count",
dev->gpu ? (dev->node_props.simd_count *
- dev->gpu->num_xcc_per_node) : 0);
+ NUM_XCC(dev->gpu->xcc_mask)) : 0);
sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
dev->node_props.mem_banks_count);
sysfs_show_32bit_prop(buffer, offs, "caches_count",
dev->node_props.wave_front_size);
sysfs_show_32bit_prop(buffer, offs, "array_count",
dev->gpu ? (dev->node_props.array_count *
- dev->gpu->num_xcc_per_node) : 0);
+ NUM_XCC(dev->gpu->xcc_mask)) : 0);
sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
dev->node_props.simd_arrays_per_engine);
sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
sysfs_show_64bit_prop(buffer, offs, "unique_id",
dev->gpu->adev->unique_id);
sysfs_show_32bit_prop(buffer, offs, "num_xcc",
- dev->gpu->num_xcc_per_node);
+ NUM_XCC(dev->gpu->xcc_mask));
}
return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
buf[4] = gpu->adev->pdev->bus->number;
buf[5] = lower_32_bits(local_mem_size);
buf[6] = upper_32_bits(local_mem_size);
- buf[7] = gpu->start_xcc_id | (gpu->num_xcc_per_node << 16);
+ buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16);
for (i = 0, hashout = 0; i < 8; i++)
hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);