2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/log2.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/mutex.h>
30 #include <linux/device.h>
32 #include "kfd_pm4_headers.h"
33 #include "kfd_pm4_headers_diq.h"
34 #include "kfd_kernel_queue.h"
36 #include "kfd_pm4_opcodes.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_dbgdev.h"
40 #include "kfd_device_queue_manager.h"
41 #include "../../radeon/cik_reg.h"
43 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
45 dev->kfd2kgd->address_watch_disable(dev->kgd);
48 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
49 unsigned int pasid, uint64_t vmid0_address,
50 uint32_t *packet_buff, size_t size_in_bytes)
52 struct pm4__release_mem *rm_packet;
53 struct pm4__indirect_buffer_pasid *ib_packet;
54 struct kfd_mem_obj *mem_obj;
55 size_t pq_packets_size_in_bytes;
56 union ULARGE_INTEGER *largep;
57 union ULARGE_INTEGER addr;
58 struct kernel_queue *kq;
60 unsigned int *ib_packet_buff;
63 if (WARN_ON(!size_in_bytes))
68 pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
69 sizeof(struct pm4__indirect_buffer_pasid);
72 * We acquire a buffer from DIQ
73 * The receive packet buff will be sitting on the Indirect Buffer
74 * and in the PQ we put the IB packet + sync packet(s).
76 status = kq->ops.acquire_packet_buffer(kq,
77 pq_packets_size_in_bytes / sizeof(uint32_t),
80 pr_err("acquire_packet_buffer failed\n");
84 memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
86 ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
88 ib_packet->header.count = 3;
89 ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
90 ib_packet->header.type = PM4_TYPE_3;
92 largep = (union ULARGE_INTEGER *) &vmid0_address;
94 ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
95 ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
97 ib_packet->control = (1 << 23) | (1 << 31) |
98 ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
100 ib_packet->bitfields5.pasid = pasid;
103 * for now we use release mem for GPU-CPU synchronization
104 * Consider WaitRegMem + WriteData as a better alternative
105 * we get a GART allocations ( gpu/cpu mapping),
106 * for the sync variable, and wait until:
108 * (b) Sync var is written by CP to mem.
110 rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
111 (sizeof(struct pm4__indirect_buffer_pasid) /
112 sizeof(unsigned int)));
114 status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
118 pr_err("Failed to allocate GART memory\n");
119 kq->ops.rollback_packet(kq);
123 rm_state = (uint64_t *) mem_obj->cpu_ptr;
125 *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
127 rm_packet->header.opcode = IT_RELEASE_MEM;
128 rm_packet->header.type = PM4_TYPE_3;
129 rm_packet->header.count = sizeof(struct pm4__release_mem) /
130 sizeof(unsigned int) - 2;
132 rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
133 rm_packet->bitfields2.event_index =
134 event_index___release_mem__end_of_pipe;
136 rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
137 rm_packet->bitfields2.atc = 0;
138 rm_packet->bitfields2.tc_wb_action_ena = 1;
140 addr.quad_part = mem_obj->gpu_addr;
142 rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
143 rm_packet->address_hi = addr.u.high_part;
145 rm_packet->bitfields3.data_sel =
146 data_sel___release_mem__send_64_bit_data;
148 rm_packet->bitfields3.int_sel =
149 int_sel___release_mem__send_data_after_write_confirm;
151 rm_packet->bitfields3.dst_sel =
152 dst_sel___release_mem__memory_controller;
154 rm_packet->data_lo = QUEUESTATE__ACTIVE;
156 kq->ops.submit_packet(kq);
158 /* Wait till CP writes sync code: */
159 status = amdkfd_fence_wait_timeout(
160 (unsigned int *) rm_state,
161 QUEUESTATE__ACTIVE, 1500);
163 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
168 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
171 * no action is needed in this case,
172 * just make sure diq will not be used
180 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
182 struct queue_properties properties;
184 struct kernel_queue *kq = NULL;
187 properties.type = KFD_QUEUE_TYPE_DIQ;
189 status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
193 pr_err("Failed to create DIQ\n");
197 pr_debug("DIQ Created with queue id: %d\n", qid);
199 kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
202 pr_err("Error getting DIQ\n");
203 pqm_destroy_queue(dbgdev->pqm, qid);
212 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
214 /* disable watch address */
215 dbgdev_address_watch_disable_nodiq(dbgdev->dev);
219 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
221 /* todo - disable address watch */
224 status = pqm_destroy_queue(dbgdev->pqm,
225 dbgdev->kq->queue->properties.queue_id);
231 static void dbgdev_address_watch_set_registers(
232 const struct dbg_address_watch_info *adw_info,
233 union TCP_WATCH_ADDR_H_BITS *addrHi,
234 union TCP_WATCH_ADDR_L_BITS *addrLo,
235 union TCP_WATCH_CNTL_BITS *cntl,
236 unsigned int index, unsigned int vmid)
238 union ULARGE_INTEGER addr;
245 if (adw_info->watch_mask)
246 cntl->bitfields.mask =
247 (uint32_t) (adw_info->watch_mask[index] &
248 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
250 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
252 addr.quad_part = (unsigned long long) adw_info->watch_address[index];
254 addrHi->bitfields.addr = addr.u.high_part &
255 ADDRESS_WATCH_REG_ADDHIGH_MASK;
256 addrLo->bitfields.addr =
257 (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
259 cntl->bitfields.mode = adw_info->watch_mode[index];
260 cntl->bitfields.vmid = (uint32_t) vmid;
261 /* for now assume it is an ATC address */
262 cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
264 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
265 pr_debug("\t\t%20s %08x\n", "set reg add high :",
266 addrHi->bitfields.addr);
267 pr_debug("\t\t%20s %08x\n", "set reg add low :",
268 addrLo->bitfields.addr);
271 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
272 struct dbg_address_watch_info *adw_info)
274 union TCP_WATCH_ADDR_H_BITS addrHi;
275 union TCP_WATCH_ADDR_L_BITS addrLo;
276 union TCP_WATCH_CNTL_BITS cntl;
277 struct kfd_process_device *pdd;
280 /* taking the vmid for that process on the safe way using pdd */
281 pdd = kfd_get_process_device_data(dbgdev->dev,
284 pr_err("Failed to get pdd for wave control no DIQ\n");
292 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
293 (adw_info->num_watch_points == 0)) {
294 pr_err("num_watch_points is invalid\n");
298 if (!adw_info->watch_mode || !adw_info->watch_address) {
299 pr_err("adw_info fields are not valid\n");
303 for (i = 0; i < adw_info->num_watch_points; i++) {
304 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
305 &cntl, i, pdd->qpd.vmid);
307 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
308 pr_debug("\t\t%20s %08x\n", "register index :", i);
309 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
310 pr_debug("\t\t%20s %08x\n", "Address Low is :",
311 addrLo.bitfields.addr);
312 pr_debug("\t\t%20s %08x\n", "Address high is :",
313 addrHi.bitfields.addr);
314 pr_debug("\t\t%20s %08x\n", "Address high is :",
315 addrHi.bitfields.addr);
316 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
317 cntl.bitfields.mask);
318 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
319 cntl.bitfields.mode);
320 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
321 cntl.bitfields.vmid);
322 pr_debug("\t\t%20s %08x\n", "Control atc is :",
324 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
326 pdd->dev->kfd2kgd->address_watch_execute(
337 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
338 struct dbg_address_watch_info *adw_info)
340 struct pm4__set_config_reg *packets_vec;
341 union TCP_WATCH_ADDR_H_BITS addrHi;
342 union TCP_WATCH_ADDR_L_BITS addrLo;
343 union TCP_WATCH_CNTL_BITS cntl;
344 struct kfd_mem_obj *mem_obj;
345 unsigned int aw_reg_add_dword;
346 uint32_t *packet_buff_uint;
349 size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
350 /* we do not control the vmid in DIQ mode, just a place holder */
351 unsigned int vmid = 0;
357 if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
358 (adw_info->num_watch_points == 0)) {
359 pr_err("num_watch_points is invalid\n");
363 if (!adw_info->watch_mode || !adw_info->watch_address) {
364 pr_err("adw_info fields are not valid\n");
368 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
371 pr_err("Failed to allocate GART memory\n");
375 packet_buff_uint = mem_obj->cpu_ptr;
377 memset(packet_buff_uint, 0, ib_size);
379 packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
381 packets_vec[0].header.count = 1;
382 packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
383 packets_vec[0].header.type = PM4_TYPE_3;
384 packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
385 packets_vec[0].bitfields2.insert_vmid = 1;
386 packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
387 packets_vec[1].bitfields2.insert_vmid = 0;
388 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
389 packets_vec[2].bitfields2.insert_vmid = 0;
390 packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
391 packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
392 packets_vec[3].bitfields2.insert_vmid = 1;
394 for (i = 0; i < adw_info->num_watch_points; i++) {
395 dbgdev_address_watch_set_registers(adw_info,
402 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
403 pr_debug("\t\t%20s %08x\n", "register index :", i);
404 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
405 pr_debug("\t\t%20s %p\n", "Add ptr is :",
406 adw_info->watch_address);
407 pr_debug("\t\t%20s %08llx\n", "Add is :",
408 adw_info->watch_address[i]);
409 pr_debug("\t\t%20s %08x\n", "Address Low is :",
410 addrLo.bitfields.addr);
411 pr_debug("\t\t%20s %08x\n", "Address high is :",
412 addrHi.bitfields.addr);
413 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
414 cntl.bitfields.mask);
415 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
416 cntl.bitfields.mode);
417 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
418 cntl.bitfields.vmid);
419 pr_debug("\t\t%20s %08x\n", "Control atc is :",
421 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
424 dbgdev->dev->kfd2kgd->address_watch_get_offset(
427 ADDRESS_WATCH_REG_CNTL);
429 packets_vec[0].bitfields2.reg_offset =
430 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
432 packets_vec[0].reg_data[0] = cntl.u32All;
435 dbgdev->dev->kfd2kgd->address_watch_get_offset(
438 ADDRESS_WATCH_REG_ADDR_HI);
440 packets_vec[1].bitfields2.reg_offset =
441 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
442 packets_vec[1].reg_data[0] = addrHi.u32All;
445 dbgdev->dev->kfd2kgd->address_watch_get_offset(
448 ADDRESS_WATCH_REG_ADDR_LO);
450 packets_vec[2].bitfields2.reg_offset =
451 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
452 packets_vec[2].reg_data[0] = addrLo.u32All;
454 /* enable watch flag if address is not zero*/
455 if (adw_info->watch_address[i] > 0)
456 cntl.bitfields.valid = 1;
458 cntl.bitfields.valid = 0;
461 dbgdev->dev->kfd2kgd->address_watch_get_offset(
464 ADDRESS_WATCH_REG_CNTL);
466 packets_vec[3].bitfields2.reg_offset =
467 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
468 packets_vec[3].reg_data[0] = cntl.u32All;
470 status = dbgdev_diq_submit_ib(
472 adw_info->process->pasid,
478 pr_err("Failed to submit IB to DIQ\n");
483 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
487 static int dbgdev_wave_control_set_registers(
488 struct dbg_wave_control_info *wac_info,
489 union SQ_CMD_BITS *in_reg_sq_cmd,
490 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
493 union SQ_CMD_BITS reg_sq_cmd;
494 union GRBM_GFX_INDEX_BITS reg_gfx_index;
495 struct HsaDbgWaveMsgAMDGen2 *pMsg;
497 reg_sq_cmd.u32All = 0;
498 reg_gfx_index.u32All = 0;
499 pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
501 switch (wac_info->mode) {
502 /* Send command to single wave */
503 case HSA_DBG_WAVEMODE_SINGLE:
505 * Limit access to the process waves only,
506 * by setting vmid check
508 reg_sq_cmd.bits.check_vmid = 1;
509 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
510 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
511 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
513 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
514 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
515 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
519 /* Send command to all waves with matching VMID */
520 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
522 reg_gfx_index.bits.sh_broadcast_writes = 1;
523 reg_gfx_index.bits.se_broadcast_writes = 1;
524 reg_gfx_index.bits.instance_broadcast_writes = 1;
526 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
530 /* Send command to all CU waves with matching VMID */
531 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
533 reg_sq_cmd.bits.check_vmid = 1;
534 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
536 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
537 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
538 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
546 switch (wac_info->operand) {
547 case HSA_DBG_WAVEOP_HALT:
548 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
551 case HSA_DBG_WAVEOP_RESUME:
552 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
555 case HSA_DBG_WAVEOP_KILL:
556 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
559 case HSA_DBG_WAVEOP_DEBUG:
560 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
563 case HSA_DBG_WAVEOP_TRAP:
564 if (wac_info->trapId < MAX_TRAPID) {
565 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
566 reg_sq_cmd.bits.trap_id = wac_info->trapId;
578 *in_reg_sq_cmd = reg_sq_cmd;
579 *in_reg_gfx_index = reg_gfx_index;
585 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
586 struct dbg_wave_control_info *wac_info)
590 union SQ_CMD_BITS reg_sq_cmd;
591 union GRBM_GFX_INDEX_BITS reg_gfx_index;
592 struct kfd_mem_obj *mem_obj;
593 uint32_t *packet_buff_uint;
594 struct pm4__set_config_reg *packets_vec;
595 size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
597 reg_sq_cmd.u32All = 0;
599 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
602 pr_err("Failed to set wave control registers\n");
606 /* we do not control the VMID in DIQ, so reset it to a known value */
607 reg_sq_cmd.bits.vm_id = 0;
609 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
611 pr_debug("\t\t mode is: %u\n", wac_info->mode);
612 pr_debug("\t\t operand is: %u\n", wac_info->operand);
613 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
614 pr_debug("\t\t msg value is: %u\n",
615 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
616 pr_debug("\t\t vmid is: N/A\n");
618 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
619 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
620 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
621 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
622 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
623 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
624 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
626 pr_debug("\t\t ibw is : %u\n",
627 reg_gfx_index.bitfields.instance_broadcast_writes);
628 pr_debug("\t\t ii is : %u\n",
629 reg_gfx_index.bitfields.instance_index);
630 pr_debug("\t\t sebw is : %u\n",
631 reg_gfx_index.bitfields.se_broadcast_writes);
632 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
633 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
634 pr_debug("\t\t sbw is : %u\n",
635 reg_gfx_index.bitfields.sh_broadcast_writes);
637 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
639 status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
642 pr_err("Failed to allocate GART memory\n");
646 packet_buff_uint = mem_obj->cpu_ptr;
648 memset(packet_buff_uint, 0, ib_size);
650 packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
651 packets_vec[0].header.count = 1;
652 packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
653 packets_vec[0].header.type = PM4_TYPE_3;
654 packets_vec[0].bitfields2.reg_offset =
655 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
658 packets_vec[0].bitfields2.insert_vmid = 0;
659 packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
661 packets_vec[1].header.count = 1;
662 packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
663 packets_vec[1].header.type = PM4_TYPE_3;
664 packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
667 packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
668 packets_vec[1].bitfields2.insert_vmid = 1;
669 packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
671 /* Restore the GRBM_GFX_INDEX register */
673 reg_gfx_index.u32All = 0;
674 reg_gfx_index.bits.sh_broadcast_writes = 1;
675 reg_gfx_index.bits.instance_broadcast_writes = 1;
676 reg_gfx_index.bits.se_broadcast_writes = 1;
679 packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
680 packets_vec[2].bitfields2.reg_offset =
681 GRBM_GFX_INDEX / (sizeof(uint32_t)) -
684 packets_vec[2].bitfields2.insert_vmid = 0;
685 packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
687 status = dbgdev_diq_submit_ib(
689 wac_info->process->pasid,
695 pr_err("Failed to submit IB to DIQ\n");
697 kfd_gtt_sa_free(dbgdev->dev, mem_obj);
702 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
703 struct dbg_wave_control_info *wac_info)
706 union SQ_CMD_BITS reg_sq_cmd;
707 union GRBM_GFX_INDEX_BITS reg_gfx_index;
708 struct kfd_process_device *pdd;
710 reg_sq_cmd.u32All = 0;
712 /* taking the VMID for that process on the safe way using PDD */
713 pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
716 pr_err("Failed to get pdd for wave control no DIQ\n");
719 status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd,
722 pr_err("Failed to set wave control registers\n");
726 /* for non DIQ we need to patch the VMID: */
728 reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
730 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
732 pr_debug("\t\t mode is: %u\n", wac_info->mode);
733 pr_debug("\t\t operand is: %u\n", wac_info->operand);
734 pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
735 pr_debug("\t\t msg value is: %u\n",
736 wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
737 pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
739 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
740 pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
741 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
742 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
743 pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
744 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
745 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
747 pr_debug("\t\t ibw is : %u\n",
748 reg_gfx_index.bitfields.instance_broadcast_writes);
749 pr_debug("\t\t ii is : %u\n",
750 reg_gfx_index.bitfields.instance_index);
751 pr_debug("\t\t sebw is : %u\n",
752 reg_gfx_index.bitfields.se_broadcast_writes);
753 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
754 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
755 pr_debug("\t\t sbw is : %u\n",
756 reg_gfx_index.bitfields.sh_broadcast_writes);
758 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
760 return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
761 reg_gfx_index.u32All,
765 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
769 union SQ_CMD_BITS reg_sq_cmd;
770 union GRBM_GFX_INDEX_BITS reg_gfx_index;
771 struct kfd_process_device *pdd;
772 struct dbg_wave_control_info wac_info;
773 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
774 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
776 reg_sq_cmd.u32All = 0;
779 wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
780 wac_info.operand = HSA_DBG_WAVEOP_KILL;
782 pr_debug("Killing all process wavefronts\n");
784 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
785 * ATC_VMID15_PASID_MAPPING
786 * to check which VMID the current process is mapped to.
789 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
790 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
792 if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
793 (dev->kgd, vmid) == p->pasid) {
794 pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
801 if (vmid > last_vmid_to_scan) {
802 pr_err("Didn't find vmid for pasid %d\n", p->pasid);
806 /* taking the VMID for that process on the safe way using PDD */
807 pdd = kfd_get_process_device_data(dev, p);
811 status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd,
816 /* for non DIQ we need to patch the VMID: */
817 reg_sq_cmd.bits.vm_id = vmid;
819 dev->kfd2kgd->wave_control_execute(dev->kgd,
820 reg_gfx_index.u32All,
826 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
827 enum DBGDEV_TYPE type)
831 pdbgdev->type = type;
835 case DBGDEV_TYPE_NODIQ:
836 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
837 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
838 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
839 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
841 case DBGDEV_TYPE_DIQ:
843 pdbgdev->dbgdev_register = dbgdev_register_diq;
844 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
845 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
846 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;