{
struct ivpu_device *vdev = seq_to_ivpu(s);
- seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
+ seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_pending));
return 0;
}
fw->dvfs_mode = dvfs_mode;
- ivpu_pm_schedule_recovery(vdev);
+ ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
+ if (ret)
+ return ret;
return size;
}
return ret;
ivpu_hw_profiling_freq_drive(vdev, enable);
- ivpu_pm_schedule_recovery(vdev);
+
+ ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
+ if (ret)
+ return ret;
return size;
}
ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
+ int ret;
if (!size)
return -EINVAL;
- ivpu_pm_schedule_recovery(vdev);
+ ret = ivpu_rpm_get(vdev);
+ if (ret)
+ return ret;
+
+ ivpu_pm_trigger_recovery(vdev, "debugfs");
+ flush_work(&vdev->pm->recovery_work);
+ ivpu_rpm_put(vdev);
return size;
}
static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
{
- ivpu_err_ratelimited(vdev, "WDT NCE irq\n");
-
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
}
static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
{
- ivpu_err_ratelimited(vdev, "WDT MSS irq\n");
-
ivpu_hw_wdt_disable(vdev);
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
}
static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
{
- ivpu_err_ratelimited(vdev, "NOC Firewall irq\n");
-
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
}
/* Handler for IRQs from VPU core (irqV) */
REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status);
if (schedule_recovery)
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
return true;
}
static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
{
/* TODO: For LNN hang consider engine reset instead of full recovery */
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
}
static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
{
ivpu_hw_wdt_disable(vdev);
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
}
static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
{
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
}
/* Handler for IRQs from VPU core (irqV) */
REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
if (schedule_recovery)
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
return true;
}
hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
vdev->timeout.jsm);
- if (hb_ret == -ETIMEDOUT) {
- ivpu_hw_diagnose_failure(vdev);
- ivpu_pm_schedule_recovery(vdev);
- }
+ if (hb_ret == -ETIMEDOUT)
+ ivpu_pm_trigger_recovery(vdev, "IPC timeout");
return ret;
}
goto err_destroy_job;
}
+ down_read(&vdev->pm->reset_lock);
ret = ivpu_job_submit(job);
+ up_read(&vdev->pm->reset_lock);
if (ret)
goto err_signal_fence;
void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
{
- bool schedule_recovery = false;
u32 *event;
u32 ssid;
ivpu_mmu_dump_event(vdev, event);
ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
- if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
- schedule_recovery = true;
- else
- ivpu_mmu_user_context_mark_invalid(vdev, ssid);
- }
+ if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
+ ivpu_pm_trigger_recovery(vdev, "MMU event");
+ return;
+ }
- if (schedule_recovery)
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+ }
}
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
int ret;
+ ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
+
+ ret = pm_runtime_resume_and_get(vdev->drm.dev);
+ if (ret)
+ ivpu_err(vdev, "Failed to resume VPU: %d\n", ret);
+
+ ivpu_fw_log_dump(vdev);
+
retry:
ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
+ pm_runtime_mark_last_busy(vdev->drm.dev);
+ pm_runtime_put_autosuspend(vdev->drm.dev);
}
-void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
+void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
{
- struct ivpu_pm_info *pm = vdev->pm;
+ ivpu_err(vdev, "Recovery triggered by %s\n", reason);
if (ivpu_disable_recovery) {
ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
return;
}
- /* Schedule recovery if it's not in progress */
- if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) {
- ivpu_hw_irq_disable(vdev);
- queue_work(system_long_wq, &pm->recovery_work);
+ /* Trigger recovery if it's not in progress */
+ if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
+ ivpu_hw_diagnose_failure(vdev);
+ ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
+ queue_work(system_long_wq, &vdev->pm->recovery_work);
}
}
{
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
struct ivpu_device *vdev = pm->vdev;
- unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
- ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms);
- ivpu_hw_diagnose_failure(vdev);
-
- ivpu_pm_schedule_recovery(vdev);
+ ivpu_pm_trigger_recovery(vdev, "TDR");
}
void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
bool hw_is_idle = true;
int ret;
+ drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+ drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work));
+
ivpu_dbg(vdev, PM, "Runtime suspend..\n");
if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
{
struct ivpu_device *vdev = pci_get_drvdata(pdev);
- pm_runtime_get_sync(vdev->drm.dev);
-
ivpu_dbg(vdev, PM, "Pre-reset..\n");
atomic_inc(&vdev->pm->reset_counter);
- atomic_set(&vdev->pm->in_reset, 1);
+ atomic_set(&vdev->pm->reset_pending, 1);
+
+ pm_runtime_get_sync(vdev->drm.dev);
+ down_write(&vdev->pm->reset_lock);
ivpu_prepare_for_reset(vdev);
ivpu_hw_reset(vdev);
ivpu_pm_prepare_cold_boot(vdev);
ret = ivpu_resume(vdev);
if (ret)
ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
- atomic_set(&vdev->pm->in_reset, 0);
+ up_write(&vdev->pm->reset_lock);
+ atomic_set(&vdev->pm->reset_pending, 0);
ivpu_dbg(vdev, PM, "Post-reset done.\n");
+ pm_runtime_mark_last_busy(vdev->drm.dev);
pm_runtime_put_autosuspend(vdev->drm.dev);
}
pm->vdev = vdev;
pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
- atomic_set(&pm->in_reset, 0);
+ init_rwsem(&pm->reset_lock);
+ atomic_set(&pm->reset_pending, 0);
+ atomic_set(&pm->reset_counter, 0);
+
INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
#ifndef __IVPU_PM_H__
#define __IVPU_PM_H__
+#include <linux/rwsem.h>
#include <linux/types.h>
struct ivpu_device;
struct ivpu_device *vdev;
struct delayed_work job_timeout_work;
struct work_struct recovery_work;
- atomic_t in_reset;
+ struct rw_semaphore reset_lock;
atomic_t reset_counter;
+ atomic_t reset_pending;
bool is_warmboot;
u32 suspend_reschedule_counter;
};
int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
void ivpu_rpm_put(struct ivpu_device *vdev);
-void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason);
void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);