habanalabs: sync stream generic functionality
authorOfir Bitton <obitton@habana.ai>
Thu, 14 May 2020 15:25:47 +0000 (18:25 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Fri, 24 Jul 2020 17:31:34 +0000 (20:31 +0300)
Currently sync stream is limited only for external queues. We want to
remove this constraint by adding a new queue property dedicated for sync
stream. In addition we move the initialization and reset methods to the
common code since we can re-use them with slight changes.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/command_submission.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/gaudi/gaudiP.h
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/hw_queue.c

index e99c1d126bd3520980ddd2f5b67605bd7ff89d4e..62dab99dda98e395fe74a85a86113d388433851c 100644 (file)
@@ -740,6 +740,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        struct hl_cs_job *job;
        struct hl_cs *cs;
        struct hl_cb *cb;
+       enum hl_queue_type q_type;
        u64 *signal_seq_arr = NULL, signal_seq;
        u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
        int rc;
@@ -772,9 +773,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        chunk = &cs_chunk_array[0];
        q_idx = chunk->queue_index;
        hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+       q_type = hw_queue_prop->type;
 
        if ((q_idx >= HL_MAX_QUEUES) ||
-                       (hw_queue_prop->type != QUEUE_TYPE_EXT)) {
+                       (!hw_queue_prop->supports_sync_stream)) {
                dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
                rc = -EINVAL;
                goto free_cs_chunk_array;
@@ -871,7 +873,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
        *cs_seq = cs->sequence;
 
-       job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+       job = hl_cs_allocate_job(hdev, q_type, true);
        if (!job) {
                dev_err(hdev->dev, "Failed to allocate a new job\n");
                rc = -ENOMEM;
index 9d9cbcd5a28a6660adb1916f3fd111a00d86595c..fc377c618af04ed7a00eed86ef379a8f56dd6c87 100644 (file)
@@ -345,10 +345,12 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
                        prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
                        prop->hw_queues_props[i].driver_only = 0;
                        prop->hw_queues_props[i].requires_kernel_cb = 1;
+                       prop->hw_queues_props[i].supports_sync_stream = 1;
                } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
                        prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
                        prop->hw_queues_props[i].driver_only = 1;
                        prop->hw_queues_props[i].requires_kernel_cb = 0;
+                       prop->hw_queues_props[i].supports_sync_stream = 0;
                } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
                        prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
                        prop->hw_queues_props[i].driver_only = 0;
@@ -357,6 +359,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
                        prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
                        prop->hw_queues_props[i].driver_only = 0;
                        prop->hw_queues_props[i].requires_kernel_cb = 0;
+                       prop->hw_queues_props[i].supports_sync_stream = 0;
                }
        }
 
@@ -364,7 +367,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
                prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
 
        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
-
+       prop->sync_stream_first_sob = 0;
+       prop->sync_stream_first_mon = 0;
        prop->dram_base_address = DRAM_PHYS_BASE;
        prop->dram_size = GAUDI_HBM_SIZE_32GB;
        prop->dram_end_address = prop->dram_base_address +
@@ -6296,44 +6300,6 @@ static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
        return gaudi_cq_assignment[cq_idx];
 }
 
-static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
-{
-       struct gaudi_device *gaudi = hdev->asic_specific;
-       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-       struct hl_hw_sob *hw_sob;
-       int sob, ext_idx = gaudi->ext_queue_idx++;
-
-       /*
-        * The external queues might not sit sequentially, hence use the
-        * real external queue index for the SOB/MON base id.
-        */
-       hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
-       hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
-       hw_queue->next_sob_val = 1;
-       hw_queue->curr_sob_offset = 0;
-
-       for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
-               hw_sob = &hw_queue->hw_sob[sob];
-               hw_sob->hdev = hdev;
-               hw_sob->sob_id = hw_queue->base_sob_id + sob;
-               hw_sob->q_idx = q_idx;
-               kref_init(&hw_sob->kref);
-       }
-}
-
-static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
-{
-       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-
-       /*
-        * In case we got here due to a stuck CS, the refcnt might be bigger
-        * than 1 and therefore we reset it.
-        */
-       kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
-       hw_queue->curr_sob_offset = 0;
-       hw_queue->next_sob_val = 1;
-}
-
 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
 {
        return sizeof(struct packet_msg_short) +
@@ -6636,8 +6602,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .read_device_fw_version = gaudi_read_device_fw_version,
        .load_firmware_to_device = gaudi_load_firmware_to_device,
        .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
-       .ext_queue_init = gaudi_ext_queue_init,
-       .ext_queue_reset = gaudi_ext_queue_reset,
        .get_signal_cb_size = gaudi_get_signal_cb_size,
        .get_wait_cb_size = gaudi_get_wait_cb_size,
        .gen_signal_cb = gaudi_gen_signal_cb,
index 63baef1e4e990dc11ebd1d705a99815d18402e19..3958fe38c8ee0e47b76e78dffd841efd4796bc77 100644 (file)
@@ -234,7 +234,6 @@ struct gaudi_internal_qman_info {
  *                      engine.
  * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
  *                  Multi MSI is possible only with IOMMU enabled.
- * @ext_queue_idx: helper index for external queues initialization.
  * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
  *                    8-bit value so use u8.
  */
@@ -255,7 +254,6 @@ struct gaudi_device {
        u32                             events_stat_aggregate[GAUDI_EVENT_SIZE];
        u32                             hw_cap_initialized;
        u8                              multi_msi_mode;
-       u8                              ext_queue_idx;
        u8                              mmu_cache_inv_pi;
 };
 
index 6dccaec95ffbc256230baa0322c53f5cac1196bc..ff9e8a31ced4367459b794180a2510111a9d0ffe 100644 (file)
@@ -5156,16 +5156,6 @@ u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
        return cq_idx;
 }
 
-static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
-{
-
-}
-
-static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
-{
-
-}
-
 static u32 goya_get_signal_cb_size(struct hl_device *hdev)
 {
        return 0;
@@ -5279,8 +5269,6 @@ static const struct hl_asic_funcs goya_funcs = {
        .read_device_fw_version = goya_read_device_fw_version,
        .load_firmware_to_device = goya_load_firmware_to_device,
        .load_boot_fit_to_device = goya_load_boot_fit_to_device,
-       .ext_queue_init = goya_ext_queue_init,
-       .ext_queue_reset = goya_ext_queue_reset,
        .get_signal_cb_size = goya_get_signal_cb_size,
        .get_wait_cb_size = goya_get_wait_cb_size,
        .gen_signal_cb = goya_gen_signal_cb,
index 64d9b2dd3e19fff8414d671fc25e58bad848e4fc..8cd4b55d06084154112fa2b953226fd6f74f1f04 100644 (file)
 /* MMU */
 #define MMU_HASH_TABLE_BITS            7 /* 1 << 7 buckets */
 
+/*
+ * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
+ * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
+ */
 #define HL_RSVD_SOBS                   4
 #define HL_RSVD_MONS                   2
 
@@ -141,11 +145,13 @@ struct hl_hw_sob {
  *               false otherwise.
  * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
  *                      queue, false otherwise (a CB address must be provided).
+ * @supports_sync_stream: True if queue supports sync stream
  */
 struct hw_queue_properties {
        enum hl_queue_type      type;
        u8                      driver_only;
        u8                      requires_kernel_cb;
+       u8                      supports_sync_stream;
 };
 
 /**
@@ -245,6 +251,9 @@ struct hl_mmu_properties {
  * @cb_pool_cb_cnt: number of CBs in the CB pool.
  * @cb_pool_cb_size: size of each CB in the CB pool.
  * @tpc_enabled_mask: which TPCs are enabled.
+ * @sync_stream_first_sob: first sync object available for sync stream use
+ * @sync_stream_first_mon: first monitor available for sync stream use
+ * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  */
 struct asic_fixed_properties {
@@ -286,6 +295,8 @@ struct asic_fixed_properties {
        u32                             cb_pool_cb_cnt;
        u32                             cb_pool_cb_size;
        u32                             max_pending_cs;
+       u16                             sync_stream_first_sob;
+       u16                             sync_stream_first_mon;
        u8                              tpc_enabled_mask;
        u8                              completion_queues_count;
 };
@@ -423,6 +434,7 @@ struct hl_cs_job;
  *         exist).
  * @curr_sob_offset: the id offset to the currently used SOB from the
  *                   HL_RSVD_SOBS that are being used by this queue.
+ * @supports_sync_stream: True if queue supports sync stream
  */
 struct hl_hw_queue {
        struct hl_hw_sob        hw_sob[HL_RSVD_SOBS];
@@ -441,6 +453,7 @@ struct hl_hw_queue {
        u16                     base_mon_id;
        u8                      valid;
        u8                      curr_sob_offset;
+       u8                      supports_sync_stream;
 };
 
 /**
@@ -603,8 +616,6 @@ enum hl_pll_frequency {
  *                          contained in registers
  * @load_firmware_to_device: load the firmware to the device's memory
  * @load_boot_fit_to_device: load boot fit to device's memory
- * @ext_queue_init: Initialize the given external queue.
- * @ext_queue_reset: Reset the given external queue.
  * @get_signal_cb_size: Get signal CB size.
  * @get_wait_cb_size: Get wait CB size.
  * @gen_signal_cb: Generate a signal CB.
@@ -707,8 +718,6 @@ struct hl_asic_funcs {
                                        enum hl_fw_component fwc);
        int (*load_firmware_to_device)(struct hl_device *hdev);
        int (*load_boot_fit_to_device)(struct hl_device *hdev);
-       void (*ext_queue_init)(struct hl_device *hdev, u32 hw_queue_id);
-       void (*ext_queue_reset)(struct hl_device *hdev, u32 hw_queue_id);
        u32 (*get_signal_cb_size)(struct hl_device *hdev);
        u32 (*get_wait_cb_size)(struct hl_device *hdev);
        void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
@@ -1436,6 +1445,7 @@ struct hl_device_idle_busy_ts {
  * @cdev_sysfs_created: were char devices and sysfs nodes created.
  * @stop_on_err: true if engines should stop on error.
  * @supports_sync_stream: is sync stream supported.
+ * @sync_stream_queue_idx: helper index for sync stream queues initialization.
  * @supports_coresight: is CoreSight supported.
  * @supports_soft_reset: is soft reset supported.
  */
@@ -1523,6 +1533,7 @@ struct hl_device {
        u8                              cdev_sysfs_created;
        u8                              stop_on_err;
        u8                              supports_sync_stream;
+       u8                              sync_stream_queue_idx;
        u8                              supports_coresight;
        u8                              supports_soft_reset;
 
index 29b96d24edc2340af2007eb1e0fc75311825f6e1..27f0c34b63b9b03e5862c7a1740a5e4a25d68c95 100644 (file)
@@ -663,9 +663,6 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
        q->ci = 0;
        q->pi = 0;
 
-       if (!is_cpu_queue)
-               hdev->asic_funcs->ext_queue_init(hdev, q->hw_queue_id);
-
        return 0;
 
 free_queue:
@@ -732,6 +729,42 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
        return 0;
 }
 
+static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
+{
+       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_hw_sob *hw_sob;
+       int sob, queue_idx = hdev->sync_stream_queue_idx++;
+
+       hw_queue->base_sob_id =
+               prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
+       hw_queue->base_mon_id =
+               prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
+       hw_queue->next_sob_val = 1;
+       hw_queue->curr_sob_offset = 0;
+
+       for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
+               hw_sob = &hw_queue->hw_sob[sob];
+               hw_sob->hdev = hdev;
+               hw_sob->sob_id = hw_queue->base_sob_id + sob;
+               hw_sob->q_idx = q_idx;
+               kref_init(&hw_sob->kref);
+       }
+}
+
+static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
+{
+       struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+
+       /*
+        * In case we got here due to a stuck CS, the refcnt might be bigger
+        * than 1 and therefore we reset it.
+        */
+       kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
+       hw_queue->curr_sob_offset = 0;
+       hw_queue->next_sob_val = 1;
+}
+
 /*
  * queue_init - main initialization function for H/W queue object
  *
@@ -774,6 +807,9 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
                break;
        }
 
+       if (q->supports_sync_stream)
+               sync_stream_queue_init(hdev, q->hw_queue_id);
+
        if (rc)
                return rc;
 
@@ -848,6 +884,8 @@ int hl_hw_queues_create(struct hl_device *hdev)
                        i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
 
                q->queue_type = asic->hw_queues_props[i].type;
+               q->supports_sync_stream =
+                               asic->hw_queues_props[i].supports_sync_stream;
                rc = queue_init(hdev, q, i);
                if (rc) {
                        dev_err(hdev->dev,
@@ -889,7 +927,7 @@ void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
                        continue;
                q->pi = q->ci = 0;
 
-               if (q->queue_type == QUEUE_TYPE_EXT)
-                       hdev->asic_funcs->ext_queue_reset(hdev, q->hw_queue_id);
+               if (q->supports_sync_stream)
+                       sync_stream_queue_reset(hdev, q->hw_queue_id);
        }
 }