reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
+ reset_context.src = adev->enable_mes ?
+ AMDGPU_RESET_SRC_MES :
+ AMDGPU_RESET_SRC_HWS;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
return r;
}
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context)
{
int r = 0;
if (adev->kfd.dev)
- r = kgd2kfd_pre_reset(adev->kfd.dev);
+ r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);
return r;
}
};
struct amdgpu_device;
+struct amdgpu_reset_context;
enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
-int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
+ struct amdgpu_reset_context *reset_context);
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
-int kgd2kfd_pre_reset(struct kfd_dev *kfd);
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
return 0;
}
-static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
return 0;
}
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
- amdgpu_amdkfd_pre_reset(tmp_adev);
+ amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
/*
* Mark these ASICs to be reseted as untracked first
kfree(kfd);
}
-int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+int kgd2kfd_pre_reset(struct kfd_dev *kfd,
+ struct amdgpu_reset_context *reset_context)
{
struct kfd_node *node;
int i;
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
- kfd_smi_event_update_gpu_reset(node, false);
+ kfd_smi_event_update_gpu_reset(node, false, reset_context);
node->dqm->ops.pre_reset(node->dqm);
}
for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i];
atomic_set(&node->sram_ecc_flag, 0);
- kfd_smi_event_update_gpu_reset(node, true);
+ kfd_smi_event_update_gpu_reset(node, true, NULL);
}
return 0;
#include "amdgpu_vm.h"
#include "kfd_priv.h"
#include "kfd_smi_events.h"
+#include "amdgpu_reset.h"
struct kfd_smi_client {
struct list_head list;
add_event_to_kfifo(pid, dev, event, fifo_in, len);
}
-void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset)
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context)
{
unsigned int event;
+ char reset_cause[64];
if (post_reset) {
event = KFD_SMI_EVENT_GPU_POST_RESET;
event = KFD_SMI_EVENT_GPU_PRE_RESET;
++(dev->reset_seq_num);
}
- kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
+
+ memset(reset_cause, 0, sizeof(reset_cause));
+
+ if (reset_context)
+ amdgpu_reset_get_desc(reset_context, reset_cause,
+ sizeof(reset_cause));
+
+ kfd_smi_event_add(0, dev, event, "%x %s\n",
+ dev->reset_seq_num,
+ reset_cause);
}
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
#ifndef KFD_SMI_EVENTS_H_INCLUDED
#define KFD_SMI_EVENTS_H_INCLUDED
+struct amdgpu_reset_context;
+
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd);
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid);
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask);
-void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset);
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
+ struct amdgpu_reset_context *reset_context);
void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
unsigned long address, bool write_fault,
ktime_t ts);