accel/amdxdna: Enhance power management settings
authorLizhi Hou <lizhi.hou@amd.com>
Fri, 13 Dec 2024 23:29:31 +0000 (15:29 -0800)
committerMario Limonciello <mario.limonciello@amd.com>
Mon, 16 Dec 2024 21:50:32 +0000 (15:50 -0600)
Add SET_STATE ioctl to configure device power mode for aie2 device.
Three modes are supported initially.

POWER_MODE_DEFAULT: Enable clock gating and set DPM (Dynamic Power
Management) level to value which has been set by resource solver or
maximum DPM level the device supports.

POWER_MODE_HIGH: Enable clock gating and set DPM level to maximum DPM
level the device supports.

POWER_MODE_TURBO: Disable clock gating and set DPM level to maximum DPM
level the device supports.

Disabling clock gating means all clocks always run on full speed. And
the different clock frequency are used based on DPM level been set.
Initially, the driver set the power mode to default mode.

Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@amd.com>
Co-developed-by: George Yang <George.Yang@amd.com>
Signed-off-by: George Yang <George.Yang@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Lizhi Hou <lizhi.hou@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241213232933.1545388-4-lizhi.hou@amd.com
18 files changed:
drivers/accel/amdxdna/Makefile
drivers/accel/amdxdna/TODO
drivers/accel/amdxdna/aie2_ctx.c
drivers/accel/amdxdna/aie2_message.c
drivers/accel/amdxdna/aie2_pci.c
drivers/accel/amdxdna/aie2_pci.h
drivers/accel/amdxdna/aie2_pm.c [new file with mode: 0644]
drivers/accel/amdxdna/aie2_smu.c
drivers/accel/amdxdna/aie2_solver.c
drivers/accel/amdxdna/aie2_solver.h
drivers/accel/amdxdna/amdxdna_pci_drv.c
drivers/accel/amdxdna/amdxdna_pci_drv.h
drivers/accel/amdxdna/npu1_regs.c
drivers/accel/amdxdna/npu2_regs.c
drivers/accel/amdxdna/npu4_regs.c
drivers/accel/amdxdna/npu5_regs.c
drivers/accel/amdxdna/npu6_regs.c
include/uapi/drm/amdxdna_accel.h

index 6baf181298decf76973d7a7dfc3050bb81f87165..0e9adf6890a015cc1b9b098c0801ad741bf4fa9c 100644 (file)
@@ -5,6 +5,7 @@ amdxdna-y := \
        aie2_error.o \
        aie2_message.o \
        aie2_pci.o \
+       aie2_pm.o \
        aie2_psp.o \
        aie2_smu.o \
        aie2_solver.o \
index de4e1dbc8868ce1412f6cc4d1015020e0fd971be..5119bccd1917a1e0abf18dd736da1d934df06dd4 100644 (file)
@@ -1,4 +1,3 @@
 - Add import and export BO support
 - Add debugfs support
 - Add debug BO support
-- Improve power management
index 7218f751144c580a70647a4e17287dc5f4393bd9..cdeead75c6f58e616334cb17d9ad32ee9a0f1cf2 100644 (file)
@@ -519,6 +519,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
        struct drm_gpu_scheduler *sched;
        struct amdxdna_hwctx_priv *priv;
        struct amdxdna_gem_obj *heap;
+       struct amdxdna_dev_hdl *ndev;
        int i, ret;
 
        priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
@@ -613,6 +614,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
        }
 
        hwctx->status = HWCTX_STAT_INIT;
+       ndev = xdna->dev_handle;
+       ndev->hwctx_num++;
 
        XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
 
@@ -642,10 +645,13 @@ free_priv:
 
 void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
 {
+       struct amdxdna_dev_hdl *ndev;
        struct amdxdna_dev *xdna;
        int idx;
 
        xdna = hwctx->client->xdna;
+       ndev = xdna->dev_handle;
+       ndev->hwctx_num--;
        drm_sched_wqueue_stop(&hwctx->priv->sched);
 
        /* Now, scheduler will not send command to device. */
index c90f5e2fc11518fa659a525138bd813d51917a82..640330fd5cbda999412db06e0b5a79ad873704bb 100644 (file)
@@ -71,11 +71,18 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
 int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
 {
        DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
+       int ret;
 
        req.type = type;
        req.value = value;
 
-       return aie2_send_mgmt_msg_wait(ndev, &msg);
+       ret = aie2_send_mgmt_msg_wait(ndev, &msg);
+       if (ret) {
+               XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
+               return ret;
+       }
+
+       return 0;
 }
 
 int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
index 25c4215cc456f5b10b18654b680ca8d36d2935b1..c71b5e434deed3cfd8e3fed08a5c29249e4abd9b 100644 (file)
@@ -110,28 +110,26 @@ static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
        return 0;
 }
 
-static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev)
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+                    enum rt_config_category category, u32 *val)
 {
-       const struct rt_config *cfg = &ndev->priv->rt_config;
-       u64 value;
+       const struct rt_config *cfg;
+       u32 value;
        int ret;
 
-       ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value);
-       if (ret) {
-               XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed",
-                        cfg->type, cfg->value);
-               return ret;
-       }
+       for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
+               if (cfg->category != category)
+                       continue;
 
-       ret = aie2_get_runtime_cfg(ndev, cfg->type, &value);
-       if (ret) {
-               XDNA_ERR(ndev->xdna, "Get runtime cfg failed");
-               return ret;
+               value = val ? *val : cfg->value;
+               ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
+               if (ret) {
+                       XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
+                                cfg->type, value);
+                       return ret;
+               }
        }
 
-       if (value != cfg->value)
-               return -EINVAL;
-
        return 0;
 }
 
@@ -164,7 +162,7 @@ static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
                return ret;
        }
 
-       ret = aie2_runtime_cfg(ndev);
+       ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
        if (ret) {
                XDNA_ERR(ndev->xdna, "Runtime config failed");
                return ret;
@@ -258,9 +256,25 @@ static int aie2_xrs_unload(void *cb_arg)
        return ret;
 }
 
+static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
+{
+       struct amdxdna_dev *xdna = to_xdna_dev(ddev);
+       struct amdxdna_dev_hdl *ndev;
+
+       drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+       ndev = xdna->dev_handle;
+       ndev->dft_dpm_level = dpm_level;
+       if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
+               return 0;
+
+       return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+}
+
 static struct xrs_action_ops aie2_xrs_actions = {
        .load = aie2_xrs_load,
        .unload = aie2_xrs_unload,
+       .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
 };
 
 static void aie2_hw_stop(struct amdxdna_dev *xdna)
@@ -355,6 +369,12 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
                goto stop_psp;
        }
 
+       ret = aie2_pm_init(ndev);
+       if (ret) {
+               XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
+               goto destroy_mgmt_chann;
+       }
+
        ret = aie2_mgmt_fw_init(ndev);
        if (ret) {
                XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
@@ -481,10 +501,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
        }
        ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
 
-       xrs_cfg.clk_list.num_levels = 3;
-       xrs_cfg.clk_list.cu_clk_list[0] = 0;
-       xrs_cfg.clk_list.cu_clk_list[1] = 800;
-       xrs_cfg.clk_list.cu_clk_list[2] = 1000;
+       xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
+       for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
+               xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
        xrs_cfg.sys_eff_factor = 1;
        xrs_cfg.ddev = &xdna->ddev;
        xrs_cfg.actions = &aie2_xrs_actions;
@@ -658,6 +677,22 @@ static int aie2_get_firmware_version(struct amdxdna_client *client,
        return 0;
 }
 
+static int aie2_get_power_mode(struct amdxdna_client *client,
+                              struct amdxdna_drm_get_info *args)
+{
+       struct amdxdna_drm_get_power_mode mode = {};
+       struct amdxdna_dev *xdna = client->xdna;
+       struct amdxdna_dev_hdl *ndev;
+
+       ndev = xdna->dev_handle;
+       mode.power_mode = ndev->pw_mode;
+
+       if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
+               return -EFAULT;
+
+       return 0;
+}
+
 static int aie2_get_clock_metadata(struct amdxdna_client *client,
                                   struct amdxdna_drm_get_info *args)
 {
@@ -671,11 +706,11 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
        if (!clock)
                return -ENOMEM;
 
-       memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
-              sizeof(clock->mp_npu_clock.name));
-       clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
-       memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock->h_clock.name));
-       clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
+       snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
+                "MP-NPU Clock");
+       clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
+       snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
+       clock->h_clock.freq_mhz = ndev->hclk_freq;
 
        if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
                ret = -EFAULT;
@@ -773,6 +808,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
        case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
                ret = aie2_get_firmware_version(client, args);
                break;
+       case DRM_AMDXDNA_GET_POWER_MODE:
+               ret = aie2_get_power_mode(client, args);
+               break;
        default:
                XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
                ret = -EOPNOTSUPP;
@@ -783,12 +821,61 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
        return ret;
 }
 
+static int aie2_set_power_mode(struct amdxdna_client *client,
+                              struct amdxdna_drm_set_state *args)
+{
+       struct amdxdna_drm_set_power_mode power_state;
+       enum amdxdna_power_mode_type power_mode;
+       struct amdxdna_dev *xdna = client->xdna;
+
+       if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
+                          sizeof(power_state))) {
+               XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
+               return -EFAULT;
+       }
+
+       if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad)))
+               return -EINVAL;
+
+       power_mode = power_state.power_mode;
+       if (power_mode > POWER_MODE_TURBO) {
+               XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
+               return -EINVAL;
+       }
+
+       return aie2_pm_set_mode(xdna->dev_handle, power_mode);
+}
+
+static int aie2_set_state(struct amdxdna_client *client,
+                         struct amdxdna_drm_set_state *args)
+{
+       struct amdxdna_dev *xdna = client->xdna;
+       int ret, idx;
+
+       if (!drm_dev_enter(&xdna->ddev, &idx))
+               return -ENODEV;
+
+       switch (args->param) {
+       case DRM_AMDXDNA_SET_POWER_MODE:
+               ret = aie2_set_power_mode(client, args);
+               break;
+       default:
+               XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
+               ret = -EOPNOTSUPP;
+               break;
+       }
+
+       drm_dev_exit(idx);
+       return ret;
+}
+
 const struct amdxdna_dev_ops aie2_ops = {
        .init           = aie2_init,
        .fini           = aie2_fini,
        .resume         = aie2_hw_start,
        .suspend        = aie2_hw_stop,
        .get_aie_info   = aie2_get_info,
+       .set_aie_state  = aie2_set_state,
        .hwctx_init     = aie2_hwctx_init,
        .hwctx_fini     = aie2_hwctx_fini,
        .hwctx_config   = aie2_hwctx_config,
index 1c6f07d9b805dcf145d5d1f63775c2462a1f0d33..8c17b74654ce6cea76130fc33a4b0eca5b35a562 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef _AIE2_PCI_H_
 #define _AIE2_PCI_H_
 
+#include <drm/amdxdna_accel.h>
 #include <linux/semaphore.h>
 
 #include "amdxdna_mailbox.h"
@@ -48,9 +49,6 @@
        pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
 })
 
-#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_mpnpuclk_freq_max)
-#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
-
 enum aie2_smu_reg_idx {
        SMU_CMD_REG = 0,
        SMU_ARG_REG,
@@ -112,14 +110,20 @@ struct aie_metadata {
        struct aie_tile_metadata shim;
 };
 
-struct clock_entry {
-       char name[16];
-       u32 freq_mhz;
+enum rt_config_category {
+       AIE2_RT_CFG_INIT,
+       AIE2_RT_CFG_CLK_GATING,
 };
 
 struct rt_config {
        u32     type;
        u32     value;
+       u32     category;
+};
+
+struct dpm_clk_freq {
+       u32     npuclk;
+       u32     hclk;
 };
 
 /*
@@ -150,6 +154,7 @@ struct amdxdna_hwctx_priv {
 };
 
 enum aie2_dev_status {
+       AIE2_DEV_UNINIT,
        AIE2_DEV_INIT,
        AIE2_DEV_START,
 };
@@ -169,8 +174,15 @@ struct amdxdna_dev_hdl {
        u32                             total_col;
        struct aie_version              version;
        struct aie_metadata             metadata;
-       struct clock_entry              mp_npu_clock;
-       struct clock_entry              h_clock;
+
+       /* power management and clock*/
+       enum amdxdna_power_mode_type    pw_mode;
+       u32                             dpm_level;
+       u32                             dft_dpm_level;
+       u32                             max_dpm_level;
+       u32                             clk_gating;
+       u32                             npuclk_freq;
+       u32                             hclk_freq;
 
        /* Mailbox and the management channel */
        struct mailbox                  *mbox;
@@ -178,6 +190,7 @@ struct amdxdna_dev_hdl {
        struct async_events             *async_events;
 
        enum aie2_dev_status            dev_status;
+       u32                             hwctx_num;
 };
 
 #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
@@ -188,11 +201,17 @@ struct aie2_bar_off_pair {
        u32     offset;
 };
 
+struct aie2_hw_ops {
+       int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+};
+
 struct amdxdna_dev_priv {
        const char                      *fw_path;
        u64                             protocol_major;
        u64                             protocol_minor;
-       struct rt_config                rt_config;
+       const struct rt_config          *rt_config;
+       const struct dpm_clk_freq       *dpm_clk_tbl;
+
 #define COL_ALIGN_NONE   0
 #define COL_ALIGN_NATURE 1
        u32                             col_align;
@@ -203,15 +222,29 @@ struct amdxdna_dev_priv {
        struct aie2_bar_off_pair        sram_offs[SRAM_MAX_INDEX];
        struct aie2_bar_off_pair        psp_regs_off[PSP_MAX_REGS];
        struct aie2_bar_off_pair        smu_regs_off[SMU_MAX_REGS];
-       u32                             smu_mpnpuclk_freq_max;
-       u32                             smu_hclk_freq_max;
+       struct aie2_hw_ops              hw_ops;
 };
 
 extern const struct amdxdna_dev_ops aie2_ops;
 
+int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
+                    enum rt_config_category category, u32 *val);
+
+/* aie2 npu hw config */
+extern const struct dpm_clk_freq npu1_dpm_clk_table[];
+extern const struct dpm_clk_freq npu4_dpm_clk_table[];
+extern const struct rt_config npu1_default_rt_cfg[];
+extern const struct rt_config npu4_default_rt_cfg[];
+
 /* aie2_smu.c */
 int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
 void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
+
+/* aie2_pm.c */
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
 
 /* aie2_psp.c */
 struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/aie2_pm.c
new file mode 100644 (file)
index 0000000..426c38f
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Advanced Micro Devices, Inc.
+ */
+
+#include <drm/amdxdna_accel.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/gpu_scheduler.h>
+
+#include "aie2_pci.h"
+#include "amdxdna_pci_drv.h"
+
+#define AIE2_CLK_GATING_ENABLE 1
+#define AIE2_CLK_GATING_DISABLE        0
+
+static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
+{
+       int ret;
+
+       ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
+       if (ret)
+               return ret;
+
+       ndev->clk_gating = val;
+       return 0;
+}
+
+int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
+{
+       int ret;
+
+       if (ndev->dev_status != AIE2_DEV_UNINIT) {
+               /* Resume device */
+               ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
+               if (ret)
+                       return ret;
+
+               ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
+               if (ret)
+                       return ret;
+
+               return 0;
+       }
+
+       while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
+               ndev->max_dpm_level++;
+       ndev->max_dpm_level--;
+
+       ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
+       if (ret)
+               return ret;
+
+       ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
+       if (ret)
+               return ret;
+
+       ndev->pw_mode = POWER_MODE_DEFAULT;
+       ndev->dft_dpm_level = ndev->max_dpm_level;
+
+       return 0;
+}
+
+int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
+{
+       struct amdxdna_dev *xdna = ndev->xdna;
+       u32 clk_gating, dpm_level;
+       int ret;
+
+       drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+
+       if (ndev->pw_mode == target)
+               return 0;
+
+       switch (target) {
+       case POWER_MODE_TURBO:
+               if (ndev->hwctx_num) {
+                       XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
+                       return -EINVAL;
+               }
+
+               clk_gating = AIE2_CLK_GATING_DISABLE;
+               dpm_level = ndev->max_dpm_level;
+               break;
+       case POWER_MODE_HIGH:
+               clk_gating = AIE2_CLK_GATING_ENABLE;
+               dpm_level = ndev->max_dpm_level;
+               break;
+       case POWER_MODE_DEFAULT:
+               clk_gating = AIE2_CLK_GATING_ENABLE;
+               dpm_level = ndev->dft_dpm_level;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
+       if (ret)
+               return ret;
+
+       ret = aie2_pm_set_clk_gating(ndev, clk_gating);
+       if (ret)
+               return ret;
+
+       ndev->pw_mode = target;
+
+       return 0;
+}
index 91893d438da771555d83aa9b289eb2c8e4d96f5c..73388443c676788e17067505cb304ada23b61637 100644 (file)
 #define AIE2_SMU_POWER_OFF             0x4
 #define AIE2_SMU_SET_MPNPUCLK_FREQ     0x5
 #define AIE2_SMU_SET_HCLK_FREQ         0x6
+#define AIE2_SMU_SET_SOFT_DPMLEVEL     0x7
+#define AIE2_SMU_SET_HARD_DPMLEVEL     0x8
 
-static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
+static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
+                        u32 reg_arg, u32 *out)
 {
        u32 resp;
        int ret;
@@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
                return ret;
        }
 
+       if (out)
+               *out = readl(SMU_REG(ndev, SMU_OUT_REG));
+
        if (resp != SMU_RESULT_OK) {
                XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, resp);
                return -EINVAL;
@@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd, u32 reg_arg)
        return 0;
 }
 
-static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
+int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 {
+       u32 freq;
        int ret;
 
-       if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
-               XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
-               return -EINVAL;
+       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
+                           ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
+       if (ret) {
+               XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
+                        ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
        }
+       ndev->npuclk_freq = freq;
 
-       ndev->mp_npu_clock.freq_mhz = freq_mhz;
-       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
-       if (!ret)
-               XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", freq_mhz);
-
-       return ret;
-}
-
-static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, u32 freq_mhz)
-{
-       int ret;
-
-       if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
-               XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
-               return -EINVAL;
+       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
+                           ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
+       if (ret) {
+               XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
+                        ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
        }
+       ndev->hclk_freq = freq;
+       ndev->dpm_level = dpm_level;
 
-       ndev->h_clock.freq_mhz = freq_mhz;
-       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
-       if (!ret)
-               XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", freq_mhz);
+       XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+                ndev->npuclk_freq, ndev->hclk_freq);
 
-       return ret;
+       return 0;
 }
 
-int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
 {
        int ret;
 
-       ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
+       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, NULL);
        if (ret) {
-               XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
+               XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
+                        dpm_level, ret);
                return ret;
        }
 
-       ret = aie2_smu_set_mpnpu_clock_freq(ndev, SMU_MPNPUCLK_FREQ_MAX(ndev));
+       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, NULL);
        if (ret) {
-               XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
+               XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
+                        dpm_level, ret);
                return ret;
        }
-       snprintf(ndev->mp_npu_clock.name, sizeof(ndev->mp_npu_clock.name), "MP-NPU Clock");
 
-       ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
+       ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
+       ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
+       ndev->dpm_level = dpm_level;
+
+       XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
+                ndev->npuclk_freq, ndev->hclk_freq);
+
+       return 0;
+}
+
+int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
+{
+       int ret;
+
+       ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
        if (ret) {
-               XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
+               XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
                return ret;
        }
-       snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H Clock");
 
        return 0;
 }
@@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
 {
        int ret;
 
-       ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
+       ndev->priv->hw_ops.set_dpm(ndev, 0);
+       ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
        if (ret)
                XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
 }
index a537c66589a44e628064f752e8756ea2f9711cb0..1939625d6027e7179aa775f02de87c1f14dfa408 100644 (file)
@@ -25,6 +25,7 @@ struct solver_node {
 
        struct partition_node   *pt_node;
        void                    *cb_arg;
+       u32                     dpm_level;
        u32                     cols_len;
        u32                     start_cols[] __counted_by(cols_len);
 };
@@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
        return 0;
 }
 
+static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
+{
+       /*
+        * gops is retrieved from the xmodel, so it's always set
+        * fps and latency are the configurable params from the application
+        */
+       if (rqos->gops > 0 && (rqos->fps > 0 ||  rqos->latency > 0))
+               return true;
+
+       return false;
+}
+
+static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
+{
+       struct solver_rgroup *rgp = &xrs->rgp;
+       struct cdo_parts *cdop = &req->cdo;
+       struct aie_qos *rqos = &req->rqos;
+       u32 freq, max_dpm_level, level;
+       struct solver_node *node;
+
+       max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
+       /* If no QoS parameters are passed, set it to the max DPM level */
+       if (!is_valid_qos_dpm_params(rqos)) {
+               level = max_dpm_level;
+               goto set_dpm;
+       }
+
+       /* Find one CDO group that meet the GOPs requirement. */
+       for (level = 0; level < max_dpm_level; level++) {
+               freq = xrs->cfg.clk_list.cu_clk_list[level];
+               if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
+                       break;
+       }
+
+       /* set the dpm level which fits all the sessions */
+       list_for_each_entry(node, &rgp->node_list, list) {
+               if (node->dpm_level > level)
+                       level = node->dpm_level;
+       }
+
+set_dpm:
+       *dpm_level = level;
+       return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
+}
+
 static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
 {
        struct solver_node *node;
@@ -159,12 +205,9 @@ static int get_free_partition(struct solver_state *xrs,
        pt_node->ncols = ncols;
 
        /*
-        * Before fully support latency in QoS, if a request
-        * specifies a non-zero latency value, it will not share
-        * the partition with other requests.
+        * Always set exclusive to false for now.
         */
-       if (req->rqos.latency)
-               pt_node->exclusive = true;
+       pt_node->exclusive = false;
 
        list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
        xrs->rgp.npartition_node++;
@@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
        struct xrs_action_load load_act;
        struct solver_node *snode;
        struct solver_state *xrs;
+       u32 dpm_level;
        int ret;
 
        xrs = (struct solver_state *)hdl;
@@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
        if (ret)
                goto free_node;
 
+       ret = set_dpm_level(xrs, req, &dpm_level);
+       if (ret)
+               goto free_node;
+
+       snode->dpm_level = dpm_level;
        snode->cb_arg = cb_arg;
 
        drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
index 9b1847bb46a6ab91257b702e2d3459609a99ba0d..a2e3c52229e9989b7509d78adc82986cc751766e 100644 (file)
@@ -99,6 +99,7 @@ struct clk_list_info {
 struct xrs_action_ops {
        int (*load)(void *cb_arg, struct xrs_action_load *action);
        int (*unload)(void *cb_arg);
+       int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
 };
 
 /*
index c00ca314d033cf647df0654a7afff4873b81501d..194e44fc243d191e3d831f12b890d3040e19ed17 100644 (file)
@@ -161,6 +161,24 @@ static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct
        return ret;
 }
 
+static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+       struct amdxdna_client *client = filp->driver_priv;
+       struct amdxdna_dev *xdna = to_xdna_dev(dev);
+       struct amdxdna_drm_set_state *args = data;
+       int ret;
+
+       if (!xdna->dev_info->ops->set_aie_state)
+               return -EOPNOTSUPP;
+
+       XDNA_DBG(xdna, "Request parameter %u", args->param);
+       mutex_lock(&xdna->dev_lock);
+       ret = xdna->dev_info->ops->set_aie_state(client, args);
+       mutex_unlock(&xdna->dev_lock);
+
+       return ret;
+}
+
 static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
        /* Context */
        DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
@@ -174,6 +192,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
        DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
        /* AIE hardware */
        DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
+       DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
 };
 
 static const struct file_operations amdxdna_fops = {
index b62b63a660982c94df716f6d08bc3a9e008c8ac2..37848a8d8031b9552b2b894ab4800b8795f35d1f 100644 (file)
 #define XDNA_DBG(xdna, fmt, args...)   drm_dbg(&(xdna)->ddev, fmt, ##args)
 #define XDNA_INFO_ONCE(xdna, fmt, args...) drm_info_once(&(xdna)->ddev, fmt, ##args)
 
+#define XDNA_MBZ_DBG(xdna, ptr, sz)                                    \
+       ({                                                              \
+               int __i;                                                \
+               int __ret = 0;                                          \
+               u8 *__ptr = (u8 *)(ptr);                                \
+               for (__i = 0; __i < (sz); __i++) {                      \
+                       if (__ptr[__i]) {                               \
+                               XDNA_DBG(xdna, "MBZ check failed");     \
+                               __ret = -EINVAL;                        \
+                               break;                                  \
+                       }                                               \
+               }                                                       \
+               __ret;                                                  \
+       })
+
 #define to_xdna_dev(drm_dev) \
        ((struct amdxdna_dev *)container_of(drm_dev, struct amdxdna_dev, ddev))
 
@@ -22,6 +37,7 @@ extern const struct drm_driver amdxdna_drm_drv;
 struct amdxdna_client;
 struct amdxdna_dev;
 struct amdxdna_drm_get_info;
+struct amdxdna_drm_set_state;
 struct amdxdna_gem_obj;
 struct amdxdna_hwctx;
 struct amdxdna_sched_job;
@@ -42,6 +58,7 @@ struct amdxdna_dev_ops {
        void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
        int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
        int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
+       int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
 };
 
 /*
index f00c50461b09b244dd73496ea37b615a60636095..c8f4d1cac65d39e7fc05e2cd276d399aa148a553 100644 (file)
 #define NPU1_SMU_BAR_BASE  MPNPU_APERTURE0_BASE
 #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
 
-#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
-#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu1_default_rt_cfg[] = {
+       { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+       { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+       { 0 },
+};
 
-#define NPU1_MPNPUCLK_FREQ_MAX  600
-#define NPU1_HCLK_FREQ_MAX      1024
+const struct dpm_clk_freq npu1_dpm_clk_table[] = {
+       {400, 800},
+       {600, 1024},
+       {600, 1024},
+       {600, 1024},
+       {600, 1024},
+       {720, 1309},
+       {720, 1309},
+       {847, 1600},
+       { 0 }
+};
 
 const struct amdxdna_dev_priv npu1_dev_priv = {
        .fw_path        = "amdnpu/1502_00/npu.sbin",
        .protocol_major = 0x5,
        .protocol_minor = 0x1,
-       .rt_config      = {NPU1_RT_CFG_TYPE_PDI_LOAD, NPU1_RT_CFG_VAL_PDI_LOAD_APP},
+       .rt_config      = npu1_default_rt_cfg,
+       .dpm_clk_tbl    = npu1_dpm_clk_table,
        .col_align      = COL_ALIGN_NONE,
        .mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
@@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
                DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
                DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
        },
-       .smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
-       .smu_hclk_freq_max     = NPU1_HCLK_FREQ_MAX,
+       .hw_ops         = {
+               .set_dpm = npu1_set_dpm,
+       },
 };
 
 const struct amdxdna_dev_info dev_npu1_info = {
index 00cb381031d2e1990ce6562af0216af2c19f05fa..ac63131f9c7c36640c2590f615903159a4b67d43 100644 (file)
 #define NPU2_SMU_BAR_BASE      MMNPU_APERTURE4_BASE
 #define NPU2_SRAM_BAR_BASE     MMNPU_APERTURE1_BASE
 
-#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU2_MPNPUCLK_FREQ_MAX  1267
-#define NPU2_HCLK_FREQ_MAX      1800
-
 const struct amdxdna_dev_priv npu2_dev_priv = {
        .fw_path        = "amdnpu/17f0_00/npu.sbin",
        .protocol_major = 0x6,
        .protocol_minor = 0x1,
-       .rt_config      = {NPU2_RT_CFG_TYPE_PDI_LOAD, NPU2_RT_CFG_VAL_PDI_LOAD_APP},
+       .rt_config      = npu4_default_rt_cfg,
+       .dpm_clk_tbl    = npu4_dpm_clk_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
                DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
                DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU2_SMU, MP1_C2PMSG_60),
        },
-       .smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
-       .smu_hclk_freq_max     = NPU2_HCLK_FREQ_MAX,
+       .hw_ops =        {
+               .set_dpm = npu4_set_dpm,
+       },
 };
 
 const struct amdxdna_dev_info dev_npu2_info = {
index b6dae9667cca2436094067ef62eda9849fbbd8fc..a713ac18adfcab4ad94e641bf381ad5c254fa492 100644 (file)
 #define NPU4_SMU_BAR_BASE      MMNPU_APERTURE4_BASE
 #define NPU4_SRAM_BAR_BASE     MMNPU_APERTURE1_BASE
 
-#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
+const struct rt_config npu4_default_rt_cfg[] = {
+       { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
+       { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+       { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+       { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+       { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
+       { 0 },
+};
 
-#define NPU4_MPNPUCLK_FREQ_MAX  1267
-#define NPU4_HCLK_FREQ_MAX      1800
+const struct dpm_clk_freq npu4_dpm_clk_table[] = {
+       {396, 792},
+       {600, 1056},
+       {792, 1152},
+       {975, 1267},
+       {975, 1267},
+       {1056, 1408},
+       {1152, 1584},
+       {1267, 1800},
+       { 0 }
+};
 
 const struct amdxdna_dev_priv npu4_dev_priv = {
        .fw_path        = "amdnpu/17f0_10/npu.sbin",
        .protocol_major = 0x6,
        .protocol_minor = 0x1,
-       .rt_config      = {NPU4_RT_CFG_TYPE_PDI_LOAD, NPU4_RT_CFG_VAL_PDI_LOAD_APP},
+       .rt_config      = npu4_default_rt_cfg,
+       .dpm_clk_tbl    = npu4_dpm_clk_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
@@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
                DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
                DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
        },
-       .smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
-       .smu_hclk_freq_max     = NPU4_HCLK_FREQ_MAX,
+       .hw_ops         = {
+               .set_dpm = npu4_set_dpm,
+       },
 };
 
 const struct amdxdna_dev_info dev_npu4_info = {
index bed1baf8e1600872e7dcc13552bc7c7be18ecea6..67a5d5bc8a4968ec0128015261eb1111b87de923 100644 (file)
 #define NPU5_SMU_BAR_BASE      MMNPU_APERTURE4_BASE
 #define NPU5_SRAM_BAR_BASE     MMNPU_APERTURE1_BASE
 
-#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU5_MPNPUCLK_FREQ_MAX  1267
-#define NPU5_HCLK_FREQ_MAX      1800
-
 const struct amdxdna_dev_priv npu5_dev_priv = {
        .fw_path        = "amdnpu/17f0_11/npu.sbin",
        .protocol_major = 0x6,
        .protocol_minor = 0x1,
-       .rt_config      = {NPU5_RT_CFG_TYPE_PDI_LOAD, NPU5_RT_CFG_VAL_PDI_LOAD_APP},
+       .rt_config      = npu4_default_rt_cfg,
+       .dpm_clk_tbl    = npu4_dpm_clk_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
@@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
                DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
                DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
        },
-       .smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
-       .smu_hclk_freq_max     = NPU5_HCLK_FREQ_MAX,
+       .hw_ops         = {
+               .set_dpm = npu4_set_dpm,
+       },
 };
 
 const struct amdxdna_dev_info dev_npu5_info = {
index d1168fc55533d50b6161c73959db334860998e42..f46c760cefc713acf58b6452a416b004252e3d03 100644 (file)
 #define NPU6_SMU_BAR_BASE      MMNPU_APERTURE4_BASE
 #define NPU6_SRAM_BAR_BASE     MMNPU_APERTURE1_BASE
 
-#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
-#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
-
-#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
-#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
-
-#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
-#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
-
-#define NPU6_MPNPUCLK_FREQ_MAX  1267
-#define NPU6_HCLK_FREQ_MAX      1800
-
 const struct amdxdna_dev_priv npu6_dev_priv = {
        .fw_path        = "amdnpu/17f0_10/npu.sbin",
        .protocol_major = 0x6,
        .protocol_minor = 12,
-       .rt_config      = {NPU6_RT_CFG_TYPE_PDI_LOAD, NPU6_RT_CFG_VAL_PDI_LOAD_APP},
+       .rt_config      = npu4_default_rt_cfg,
+       .dpm_clk_tbl    = npu4_dpm_clk_table,
        .col_align      = COL_ALIGN_NATURE,
        .mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
        .mbox_size      = 0, /* Use BAR size */
@@ -102,6 +91,10 @@ const struct amdxdna_dev_priv npu6_dev_priv = {
                DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
                DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
        },
+       .hw_ops         = {
+               .set_dpm = npu4_set_dpm,
+       },
+
 };
 
 const struct amdxdna_dev_info dev_npu6_info = {
index 4f15e53a548d6b9aaee2c7280af2e793acef4ef5..e4edb52bc27b5d9d74eb388bc1bcab16d00d19c0 100644 (file)
@@ -33,6 +33,7 @@ enum amdxdna_drm_ioctl_id {
        DRM_AMDXDNA_SYNC_BO,
        DRM_AMDXDNA_EXEC_CMD,
        DRM_AMDXDNA_GET_INFO,
+       DRM_AMDXDNA_SET_STATE,
 };
 
 /**
@@ -375,6 +376,24 @@ struct amdxdna_drm_query_hwctx {
        __u64 errors;
 };
 
+enum amdxdna_power_mode_type {
+       POWER_MODE_DEFAULT, /* Fallback to calculated DPM */
+       POWER_MODE_LOW,     /* Set frequency to lowest DPM */
+       POWER_MODE_MEDIUM,  /* Set frequency to medium DPM */
+       POWER_MODE_HIGH,    /* Set frequency to highest DPM */
+       POWER_MODE_TURBO,   /* Maximum power */
+};
+
+/**
+ * struct amdxdna_drm_get_power_mode - Get the configured power mode
+ * @power_mode: The mode type from enum amdxdna_power_mode_type
+ * @pad: Structure padding.
+ */
+struct amdxdna_drm_get_power_mode {
+       __u8 power_mode;
+       __u8 pad[7];
+};
+
 /**
  * struct amdxdna_drm_query_firmware_version - Query the firmware version
  * @major: The major version number
@@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
        DRM_AMDXDNA_QUERY_SENSORS,
        DRM_AMDXDNA_QUERY_HW_CONTEXTS,
        DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
+       DRM_AMDXDNA_GET_POWER_MODE,
 };
 
 /**
@@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
        __u64 buffer; /* in/out */
 };
 
+enum amdxdna_drm_set_param {
+       DRM_AMDXDNA_SET_POWER_MODE,
+       DRM_AMDXDNA_WRITE_AIE_MEM,
+       DRM_AMDXDNA_WRITE_AIE_REG,
+};
+
+/**
+ * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
+ * @param: Value in enum amdxdna_drm_set_param.
+ * @buffer_size: Size of the input param.
+ * @buffer: Pointer to the input param.
+ */
+struct amdxdna_drm_set_state {
+       __u32 param; /* in */
+       __u32 buffer_size; /* in */
+       __u64 buffer; /* in */
+};
+
+/**
+ * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware
+ * @power_mode: The sensor type from enum amdxdna_power_mode_type
+ * @pad: MBZ.
+ */
+struct amdxdna_drm_set_power_mode {
+       __u8 power_mode;
+       __u8 pad[7];
+};
+
 #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
        DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
                 struct amdxdna_drm_create_hwctx)
@@ -443,6 +491,10 @@ struct amdxdna_drm_get_info {
        DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \
                 struct amdxdna_drm_get_info)
 
+#define DRM_IOCTL_AMDXDNA_SET_STATE \
+       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \
+                struct amdxdna_drm_set_state)
+
 #if defined(__cplusplus)
 } /* extern c end */
 #endif