habanalabs: support clock gating enable/disable
authorOded Gabbay <oded.gabbay@gmail.com>
Sat, 9 May 2020 09:17:21 +0000 (12:17 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Tue, 19 May 2020 11:48:41 +0000 (14:48 +0300)
In Gaudi there is a feature of clock gating certain engines.
Therefore, add this property to the device structure.

In addition, due to a limitation of this feature, the driver needs to
dynamically enable or disable this feature during run-time. Therefore, add
ASIC interface functions to enable/disable this function from the common
code.

Moreover, this feature must be turned off when the user wishes to debug the
ASIC by reading/writing registers and/or memory through the driver's
debugfs. Therefore, add an option to enable/disable clock gating via the
debugfs interface.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Documentation/ABI/testing/debugfs-driver-habanalabs
drivers/misc/habanalabs/debugfs.c
drivers/misc/habanalabs/device.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/habanalabs_drv.c

index 67e04f2d7e1da430f83ecb268b4ac02be19d070a..f6d9c2a8d52800c79a6f4563dfab2e3cf8a8e5d6 100644 (file)
@@ -8,6 +8,16 @@ Description:    Sets the device address to be used for read or write through
                 only when the IOMMU is disabled.
                 The acceptable value is a string that starts with "0x"
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/clk_gate
+Date:           May 2020
+KernelVersion:  5.8
+Contact:        oded.gabbay@gmail.com
+Description:    Allow the root user to disable/enable in runtime the clock
+                gating mechanism in Gaudi. Due to how Gaudi is built, the
+                clock gating needs to be disabled in order to access the
+                registers of the TPC and MME engines. This is sometimes needed
+                during debug by the user and hence the user needs this option
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/command_buffers
 Date:           Jan 2019
 KernelVersion:  5.1
index 37beff3096f84e508c8543b35fbad12b0d3869a1..3c8dcdfba20cf1312834599b8a6f62493f2e1d16 100644 (file)
@@ -970,6 +970,55 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf,
        return count;
 }
 
+static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       char tmp_buf[200];
+       ssize_t rc;
+
+       if (*ppos)
+               return 0;
+
+       sprintf(tmp_buf, "%d\n", hdev->clock_gating);
+       rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+                       strlen(tmp_buf) + 1);
+
+       return rc;
+}
+
+static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
+                                    size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       u32 value;
+       ssize_t rc;
+
+       if (atomic_read(&hdev->in_reset)) {
+               dev_warn_ratelimited(hdev->dev,
+                               "Can't change clock gating during reset\n");
+               return 0;
+       }
+
+       rc = kstrtouint_from_user(buf, count, 10, &value);
+       if (rc)
+               return rc;
+
+       if (value) {
+               hdev->clock_gating = 1;
+               if (hdev->asic_funcs->enable_clock_gating)
+                       hdev->asic_funcs->enable_clock_gating(hdev);
+       } else {
+               if (hdev->asic_funcs->disable_clock_gating)
+                       hdev->asic_funcs->disable_clock_gating(hdev);
+               hdev->clock_gating = 0;
+       }
+
+       return count;
+}
+
 static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
                                        size_t count, loff_t *ppos)
 {
@@ -1058,6 +1107,12 @@ static const struct file_operations hl_device_fops = {
        .write = hl_device_write
 };
 
+static const struct file_operations hl_clk_gate_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_clk_gate_read,
+       .write = hl_clk_gate_write
+};
+
 static const struct file_operations hl_stop_on_err_fops = {
        .owner = THIS_MODULE,
        .read = hl_stop_on_err_read,
@@ -1201,6 +1256,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
                                dev_entry,
                                &hl_device_fops);
 
+       debugfs_create_file("clk_gate",
+                               0200,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_clk_gate_fops);
+
        debugfs_create_file("stop_on_err",
                                0644,
                                dev_entry->root,
index f618cff9a1674886a629604b9ea9e9d4c7416877..7ce4540648cf6b53255acb44bf3d1543fb0a2634 100644 (file)
@@ -603,6 +603,9 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
 
                hdev->in_debug = 0;
 
+               if (!hdev->hard_reset_pending)
+                       hdev->asic_funcs->enable_clock_gating(hdev);
+
                goto out;
        }
 
@@ -613,6 +616,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
                goto out;
        }
 
+       hdev->asic_funcs->disable_clock_gating(hdev);
        hdev->in_debug = 1;
 
 out:
index fb2ff82e0db5c883624a75d8fae7f1fc1ce9a319..56f415f9120a920275ce7a072d3a02f013f0e12c 100644 (file)
@@ -5018,6 +5018,16 @@ int goya_armcp_info_get(struct hl_device *hdev)
        return 0;
 }
 
+static void goya_enable_clock_gating(struct hl_device *hdev)
+{
+
+}
+
+static void goya_disable_clock_gating(struct hl_device *hdev)
+{
+
+}
+
 static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
                                struct seq_file *s)
 {
@@ -5239,6 +5249,8 @@ static const struct hl_asic_funcs goya_funcs = {
        .mmu_invalidate_cache = goya_mmu_invalidate_cache,
        .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
        .send_heartbeat = goya_send_heartbeat,
+       .enable_clock_gating = goya_enable_clock_gating,
+       .disable_clock_gating = goya_disable_clock_gating,
        .debug_coresight = goya_debug_coresight,
        .is_device_idle = goya_is_device_idle,
        .soft_reset_late_init = goya_soft_reset_late_init,
index d77410886a673f434cb3bd5dda776c991455a88e..b1c3a89c7f3861dc9f49e436726fdb70e011a2dd 100644 (file)
@@ -578,6 +578,8 @@ enum hl_pll_frequency {
  * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
  *                              ASID-VA-size mask.
  * @send_heartbeat: send is-alive packet to ArmCP and verify response.
+ * @enable_clock_gating: enable clock gating for reducing power consumption.
+ * @disable_clock_gating: disable clock for accessing registers on HBW.
  * @debug_coresight: perform certain actions on Coresight for debugging.
  * @is_device_idle: return true if device is idle, false otherwise.
  * @soft_reset_late_init: perform certain actions needed after soft reset.
@@ -678,6 +680,8 @@ struct hl_asic_funcs {
        void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
                        u32 asid, u64 va, u64 size);
        int (*send_heartbeat)(struct hl_device *hdev);
+       void (*enable_clock_gating)(struct hl_device *hdev);
+       void (*disable_clock_gating)(struct hl_device *hdev);
        int (*debug_coresight)(struct hl_device *hdev, void *data);
        bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
                                struct seq_file *s);
@@ -1408,6 +1412,7 @@ struct hl_device_idle_busy_ts {
  *                   huge pages.
  * @init_done: is the initialization of the device done.
  * @mmu_enable: is MMU enabled.
+ * @clock_gating: is clock gating enabled.
  * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
  * @dma_mask: the dma mask that was set for this device
  * @in_debug: is device under debug. This, together with fpriv_list, enforces
@@ -1494,6 +1499,7 @@ struct hl_device {
        u8                              dram_default_page_mapping;
        u8                              pmmu_huge_range;
        u8                              init_done;
+       u8                              clock_gating;
        u8                              device_cpu_disabled;
        u8                              dma_mask;
        u8                              in_debug;
index 5e73b456facd95634fc5d80104910acaeec0235c..822acb5a00bb8ca16e26003b83e56c9c29a42e2c 100644 (file)
@@ -231,6 +231,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
        hdev->fw_loading = 1;
        hdev->cpu_queues_enable = 1;
        hdev->heartbeat = 1;
+       hdev->clock_gating = 1;
 
        hdev->reset_pcilink = 0;
 }