drm/amdkfd: enable single alu ops for gfx12
authorJonathan Kim <jonathan.kim@amd.com>
Mon, 21 Aug 2023 15:47:47 +0000 (11:47 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 2 May 2024 20:18:13 +0000 (16:18 -0400)
GFX12 debugging requires setting up precise ALU operation for catching
ALU exceptions.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Tested-by: Lancelot Six <lancelot.six@amd.com>
Reviewed-by: Eric Huang <jinhuieric.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_debug.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
include/uapi/linux/kfd_ioctl.h
include/uapi/linux/kfd_sysfs.h

index d889e3545120a2aa2da5f15feae297c9fab3846e..45b1975b149a985f24b1ba29c97804049a79c2d3 100644 (file)
@@ -25,6 +25,7 @@
 #include "kfd_topology.h"
 #include <linux/file.h>
 #include <uapi/linux/kfd_ioctl.h>
+#include <uapi/linux/kfd_sysfs.h>
 
 #define MAX_WATCH_ADDRESSES    4
 
@@ -497,14 +498,24 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
        int i, r = 0, rewind_count = 0;
 
        for (i = 0; i < target->n_pdds; i++) {
-               if (!kfd_dbg_is_per_vmid_supported(target->pdds[i]->dev) &&
+               struct kfd_topology_device *topo_dev =
+                               kfd_topology_device_by_id(target->pdds[i]->dev->id);
+               uint32_t caps = topo_dev->node_props.capability;
+
+               if (!(caps | HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
                        (*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
                        *flags = prev_flags;
                        return -EACCES;
                }
+
+               if (!(caps | HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) &&
+                   (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) {
+                       *flags = prev_flags;
+                       return -EACCES;
+               }
        }
 
-       target->dbg_flags = *flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
+       target->dbg_flags = *flags;
        *flags = prev_flags;
        for (i = 0; i < target->n_pdds; i++) {
                struct kfd_process_device *pdd = target->pdds[i];
index 71bea1fcb83860c07f4d46e8e899bcbe7998626c..ba326b43bec5f67a14addbc376fd5ba277e9ccca 100644 (file)
@@ -1929,6 +1929,10 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
                if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
                        dev->node_props.capability |=
                                HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+
+               if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0))
+                       dev->node_props.capability |=
+                               HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;
        }
 
        kfd_topology_set_dbg_firmware_support(dev);
index f8e9d3c1d117521f3d02dd381082430396a0c5f9..285a36601dc927dc4d7272d051ad1df82d21c918 100644 (file)
@@ -854,6 +854,7 @@ enum kfd_dbg_trap_address_watch_mode {
 /* Additional wave settings */
 enum kfd_dbg_trap_flags {
        KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1,
+       KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP = 2,
 };
 
 /* Trap exceptions */
index a51b7331e0b4b6254191006db30f95736a8607e7..5e8d28617efad4b284c7eccfb48fb4e76f7e0e56 100644 (file)
 /* Old buggy user mode depends on this being 0 */
 #define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED 0x00080000
 
-#define HSA_CAP_MEM_EDCSUPPORTED               0x00100000
-#define HSA_CAP_RASEVENTNOTIFY                 0x00200000
-#define HSA_CAP_ASIC_REVISION_MASK             0x03c00000
-#define HSA_CAP_ASIC_REVISION_SHIFT            22
-#define HSA_CAP_SRAM_EDCSUPPORTED              0x04000000
-#define HSA_CAP_SVMAPI_SUPPORTED               0x08000000
-#define HSA_CAP_FLAGS_COHERENTHOSTACCESS       0x10000000
-#define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED   0x20000000
-#define HSA_CAP_RESERVED                       0xe00f8000
+#define HSA_CAP_MEM_EDCSUPPORTED                               0x00100000
+#define HSA_CAP_RASEVENTNOTIFY                                 0x00200000
+#define HSA_CAP_ASIC_REVISION_MASK                             0x03c00000
+#define HSA_CAP_ASIC_REVISION_SHIFT                            22
+#define HSA_CAP_SRAM_EDCSUPPORTED                              0x04000000
+#define HSA_CAP_SVMAPI_SUPPORTED                               0x08000000
+#define HSA_CAP_FLAGS_COHERENTHOSTACCESS                       0x10000000
+#define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED                  0x20000000
+#define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED    0x40000000
+#define HSA_CAP_RESERVED                                       0x800f8000
 
 /* debug_prop bits in node properties */
 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK     0x0000000f