habanalabs: Timestamps buffers registration
authorfarah kassabri <fkassabri@habana.ai>
Thu, 23 Dec 2021 11:24:34 +0000 (13:24 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 28 Feb 2022 12:22:04 +0000 (14:22 +0200)
Timestamp registration API allows the user to register
a timestamp record event which will make the driver set
timestamp when CQ counter reaches the target value
and write it to a specific location specified
by the user.
This is a non blocking API, unlike the wait_for_interrupt
which is a blocking one.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/irq.c
drivers/misc/habanalabs/common/memory.c
include/uapi/misc/habanalabs.h

index ba5215b778528f8f191274a3c281be3528e8e9d5..c7757c78d0b150dcb5e19c41e04de49cbe87bbbb 100644 (file)
@@ -14,6 +14,8 @@
 #define HL_CS_FLAGS_TYPE_MASK  (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
                                HL_CS_FLAGS_COLLECTIVE_WAIT)
 
+#define MAX_TS_ITER_NUM 10
+
 /**
  * enum hl_cs_wait_status - cs wait status
  * @CS_WAIT_STATUS_BUSY: cs was not completed yet
@@ -924,7 +926,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
        int i;
        struct hl_cs *cs, *tmp;
 
-       flush_workqueue(hdev->sob_reset_wq);
+       flush_workqueue(hdev->ts_free_obj_wq);
 
        /* flush all completions before iterating over the CS mirror list in
         * order to avoid a race with the release functions
@@ -948,13 +950,19 @@ void hl_cs_rollback_all(struct hl_device *hdev)
 static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
-       struct hl_user_pending_interrupt *pend;
+       struct hl_user_pending_interrupt *pend, *temp;
        unsigned long flags;
 
        spin_lock_irqsave(&interrupt->wait_list_lock, flags);
-       list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
-               pend->fence.error = -EIO;
-               complete_all(&pend->fence.completion);
+       list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
+               if (pend->ts_reg_info.ts_buff) {
+                       list_del(&pend->wait_list_node);
+                       hl_ts_put(pend->ts_reg_info.ts_buff);
+                       hl_cb_put(pend->ts_reg_info.cq_cb);
+               } else {
+                       pend->fence.error = -EIO;
+                       complete_all(&pend->fence.completion);
+               }
        }
        spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 }
@@ -2857,43 +2865,133 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
        return 0;
 }
 
+static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
+                                       struct hl_cb *cq_cb,
+                                       u64 ts_offset, u64 cq_offset, u64 target_value,
+                                       spinlock_t *wait_list_lock,
+                                       struct hl_user_pending_interrupt **pend)
+{
+       struct hl_user_pending_interrupt *requested_offset_record =
+                               (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+                               ts_offset;
+       struct hl_user_pending_interrupt *cb_last =
+                       (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+                       (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
+       unsigned long flags, iter_counter = 0;
+       u64 current_cq_counter;
+
+       /* Validate ts_offset not exceeding last max */
+       if (requested_offset_record > cb_last) {
+               dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
+                                                               (u64)(uintptr_t)cb_last);
+               return -EINVAL;
+       }
+
+start_over:
+       spin_lock_irqsave(wait_list_lock, flags);
+
+       /* Unregister only if we didn't reach the target value
+        * since in this case there will be no handling in irq context
+        * and then it's safe to delete the node out of the interrupt list
+        * then re-use it on other interrupt
+        */
+       if (requested_offset_record->ts_reg_info.in_use) {
+               current_cq_counter = *requested_offset_record->cq_kernel_addr;
+               if (current_cq_counter < requested_offset_record->cq_target_value) {
+                       list_del(&requested_offset_record->wait_list_node);
+                       spin_unlock_irqrestore(wait_list_lock, flags);
+
+                       hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
+                       hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
+
+                       dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
+               } else {
+                       dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");
+
+                       /* irq handling in the middle give it time to finish */
+                       spin_unlock_irqrestore(wait_list_lock, flags);
+                       usleep_range(1, 10);
+                       if (++iter_counter == MAX_TS_ITER_NUM) {
+                               dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
+                               return -EINVAL;
+                       }
+
+                       goto start_over;
+               }
+       } else {
+               spin_unlock_irqrestore(wait_list_lock, flags);
+       }
+
+       /* Fill up the new registration node info */
+       requested_offset_record->ts_reg_info.in_use = 1;
+       requested_offset_record->ts_reg_info.ts_buff = ts_buff;
+       requested_offset_record->ts_reg_info.cq_cb = cq_cb;
+       requested_offset_record->ts_reg_info.timestamp_kernel_addr =
+                       (u64 *) ts_buff->user_buff_address + ts_offset;
+       requested_offset_record->cq_kernel_addr =
+                       (u64 *) cq_cb->kernel_address + cq_offset;
+       requested_offset_record->cq_target_value = target_value;
+
+       *pend = requested_offset_record;
+
+       dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
+                                               (u64)(uintptr_t)requested_offset_record);
+       return 0;
+}
+
 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
-                               struct hl_cb_mgr *cb_mgr, u64 timeout_us,
-                               u64 cq_counters_handle, u64 cq_counters_offset,
+                               struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
+                               u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
                                u64 target_value, struct hl_user_interrupt *interrupt,
+                               bool register_ts_record, u64 ts_handle, u64 ts_offset,
                                u32 *status, u64 *timestamp)
 {
+       u32 cq_patched_handle, ts_patched_handle;
        struct hl_user_pending_interrupt *pend;
+       struct hl_ts_buff *ts_buff;
+       struct hl_cb *cq_cb;
        unsigned long timeout, flags;
        long completion_rc;
-       struct hl_cb *cb;
        int rc = 0;
-       u32 handle;
 
        timeout = hl_usecs64_to_jiffies(timeout_us);
 
        hl_ctx_get(hdev, ctx);
 
-       cq_counters_handle >>= PAGE_SHIFT;
-       handle = (u32) cq_counters_handle;
-
-       cb = hl_cb_get(hdev, cb_mgr, handle);
-       if (!cb) {
-               hl_ctx_put(ctx);
-               return -EINVAL;
+       cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
+       cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
+       if (!cq_cb) {
+               rc = -EINVAL;
+               goto put_ctx;
        }
 
-       pend = kzalloc(sizeof(*pend), GFP_KERNEL);
-       if (!pend) {
-               hl_cb_put(cb);
-               hl_ctx_put(ctx);
-               return -ENOMEM;
-       }
+       if (register_ts_record) {
+               dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
+                                       interrupt->interrupt_id, ts_offset, cq_counters_offset);
 
-       hl_fence_init(&pend->fence, ULONG_MAX);
+               ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
+               ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
+               if (!ts_buff) {
+                       rc = -EINVAL;
+                       goto put_cq_cb;
+               }
 
-       pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
-       pend->cq_target_value = target_value;
+               /* Find first available record */
+               rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
+                                               cq_counters_offset, target_value,
+                                               &interrupt->wait_list_lock, &pend);
+               if (rc)
+                       goto put_ts_buff;
+       } else {
+               pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+               if (!pend) {
+                       rc = -ENOMEM;
+                       goto put_cq_cb;
+               }
+               hl_fence_init(&pend->fence, ULONG_MAX);
+               pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
+               pend->cq_target_value = target_value;
+       }
 
        spin_lock_irqsave(&interrupt->wait_list_lock, flags);
 
@@ -2901,13 +2999,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
         * before we added the node to the wait list
         */
        if (*pend->cq_kernel_addr >= target_value) {
+               if (register_ts_record)
+                       pend->ts_reg_info.in_use = 0;
                spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
                *status = HL_WAIT_CS_STATUS_COMPLETED;
-               /* There was no interrupt, we assume the completion is now. */
-               pend->fence.timestamp = ktime_get();
-               goto set_timestamp;
 
+               if (register_ts_record) {
+                       *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
+                       goto put_ts_buff;
+               } else {
+                       pend->fence.timestamp = ktime_get();
+                       goto set_timestamp;
+               }
        } else if (!timeout_us) {
                spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
                *status = HL_WAIT_CS_STATUS_BUSY;
@@ -2916,11 +3020,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
        }
 
        /* Add pending user interrupt to relevant list for the interrupt
-        * handler to monitor
+        * handler to monitor.
+        * Note that we cannot have sorted list by target value,
+        * in order to shorten the list pass loop, since
+        * same list could have nodes for different cq counter handle.
         */
        list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
        spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
+       if (register_ts_record) {
+               rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
+               goto ts_registration_exit;
+       }
+
        /* Wait for interrupt handler to signal completion */
        completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
                                                                timeout);
@@ -2952,15 +3064,30 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
                }
        }
 
+       /*
+        * We keep removing the node from list here, and not at the irq handler
+        * for completion timeout case. and if it's a registration
+        * for ts record, the node will be deleted in the irq handler after
+        * we reach the target value.
+        */
        spin_lock_irqsave(&interrupt->wait_list_lock, flags);
        list_del(&pend->wait_list_node);
        spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 set_timestamp:
        *timestamp = ktime_to_ns(pend->fence.timestamp);
-
        kfree(pend);
-       hl_cb_put(cb);
+       hl_cb_put(cq_cb);
+ts_registration_exit:
+       hl_ctx_put(ctx);
+
+       return rc;
+
+put_ts_buff:
+       hl_ts_put(ts_buff);
+put_cq_cb:
+       hl_cb_put(cq_cb);
+put_ctx:
        hl_ctx_put(ctx);
 
        return rc;
@@ -3119,11 +3246,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
                interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
 
        if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
-               rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
+               rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
                                args->in.interrupt_timeout_us, args->in.cq_counters_handle,
                                args->in.cq_counters_offset,
-                               args->in.target, interrupt, &status,
-                               &timestamp);
+                               args->in.target, interrupt,
+                               !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
+                               args->in.timestamp_handle, args->in.timestamp_offset,
+                               &status, &timestamp);
        else
                rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
                                args->in.interrupt_timeout_us, args->in.addr,
index 727315b36b8b9eae99de50d7248103cae0d823ca..60bfd737404b394b839e243e58d43a755ff67527 100644 (file)
@@ -145,6 +145,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
        hl_release_pending_user_interrupts(hpriv->hdev);
 
        hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
+       hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
        hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 
        if (!hl_hpriv_put(hpriv))
@@ -209,6 +210,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 
        case HL_MMAP_TYPE_BLOCK:
                return hl_hw_block_mmap(hpriv, vma);
+
+       case HL_MMAP_TYPE_TS_BUFF:
+               return hl_ts_mmap(hpriv, vma);
        }
 
        return -EINVAL;
@@ -410,10 +414,10 @@ static int device_early_init(struct hl_device *hdev)
                goto free_cq_wq;
        }
 
-       hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
-       if (!hdev->sob_reset_wq) {
+       hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
+       if (!hdev->ts_free_obj_wq) {
                dev_err(hdev->dev,
-                       "Failed to allocate SOB reset workqueue\n");
+                       "Failed to allocate Timestamp registration free workqueue\n");
                rc = -ENOMEM;
                goto free_eq_wq;
        }
@@ -422,7 +426,7 @@ static int device_early_init(struct hl_device *hdev)
                                        GFP_KERNEL);
        if (!hdev->hl_chip_info) {
                rc = -ENOMEM;
-               goto free_sob_reset_wq;
+               goto free_ts_free_wq;
        }
 
        rc = hl_mmu_if_set_funcs(hdev);
@@ -461,8 +465,8 @@ free_cb_mgr:
        hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 free_chip_info:
        kfree(hdev->hl_chip_info);
-free_sob_reset_wq:
-       destroy_workqueue(hdev->sob_reset_wq);
+free_ts_free_wq:
+       destroy_workqueue(hdev->ts_free_obj_wq);
 free_eq_wq:
        destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
@@ -501,7 +505,7 @@ static void device_early_fini(struct hl_device *hdev)
 
        kfree(hdev->hl_chip_info);
 
-       destroy_workqueue(hdev->sob_reset_wq);
+       destroy_workqueue(hdev->ts_free_obj_wq);
        destroy_workqueue(hdev->eq_wq);
        destroy_workqueue(hdev->device_reset_work.wq);
 
index 995167020c2763d925b1b965db675f46d6268824..b06e2b0812b6858655365b1b7ee9713b337fbc7c 100644 (file)
 #define HL_NAME                                "habanalabs"
 
 /* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:61] - Encode mmap type
+ * bits[63:59] - Encode mmap type
  * bits[45:0]  - mmap offset value
  *
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define HL_MMAP_TYPE_SHIFT             (61 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK              (0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT             (59 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK              (0x1full << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_TS_BUFF           (0x10ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_BLOCK             (0x4ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_CB                        (0x2ull << HL_MMAP_TYPE_SHIFT)
 
@@ -709,6 +710,40 @@ struct hl_cb_mgr {
        struct idr              cb_handles; /* protected by cb_lock */
 };
 
+/**
+ * struct hl_ts_mgr - describes the timestamp registration memory manager.
+ * @ts_lock: protects ts_handles.
+ * @ts_handles: an idr to hold all ts bufferes handles.
+ */
+struct hl_ts_mgr {
+       spinlock_t              ts_lock;
+       struct idr              ts_handles;
+};
+
+/**
+ * struct hl_ts_buff - describes a timestamp buffer.
+ * @refcount: reference counter for usage of the buffer.
+ * @hdev: pointer to device this buffer belongs to.
+ * @mmap: true if the buff is currently mapped to user.
+ * @kernel_buff_address: Holds the internal buffer's kernel virtual address.
+ * @user_buff_address: Holds the user buffer's kernel virtual address.
+ * @id: the buffer ID.
+ * @mmap_size: Holds the buffer size that was mmaped.
+ * @kernel_buff_size: Holds the internal kernel buffer size.
+ * @user_buff_size: Holds the user buffer size.
+ */
+struct hl_ts_buff {
+       struct kref             refcount;
+       struct hl_device        *hdev;
+       atomic_t                mmap;
+       void                    *kernel_buff_address;
+       void                    *user_buff_address;
+       u32                     id;
+       u32                     mmap_size;
+       u32                     kernel_buff_size;
+       u32                     user_buff_size;
+};
+
 /**
  * struct hl_cb - describes a Command Buffer.
  * @refcount: reference counter for usage of the CB.
@@ -886,9 +921,54 @@ struct hl_user_interrupt {
        u32                     interrupt_id;
 };
 
+/**
+ * struct timestamp_reg_free_node - holds the timestamp registration free objects node
+ * @free_objects_node: node in the list free_obj_jobs
+ * @cq_cb: pointer to cq command buffer to be freed
+ * @ts_buff: pointer to timestamp buffer to be freed
+ */
+struct timestamp_reg_free_node {
+       struct list_head        free_objects_node;
+       struct hl_cb            *cq_cb;
+       struct hl_ts_buff       *ts_buff;
+};
+
+/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
+ * the job will be to pass over the free_obj_jobs list and put refcount to objects
+ * in each node of the list
+ * @free_obj: workqueue object to free timestamp registration node objects
+ * @hdev: pointer to the device structure
+ * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
+ */
+struct timestamp_reg_work_obj {
+       struct work_struct      free_obj;
+       struct hl_device        *hdev;
+       struct list_head        *free_obj_head;
+};
+
+/* struct timestamp_reg_info - holds the timestamp registration related data.
+ * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
+ *           relevant only when doing timestamps records registration.
+ * @cq_cb: pointer to CQ counter CB.
+ * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
+ *                         relevant only when doing timestamps records
+ *                         registration.
+ * @in_use: indicates if the node already in use. relevant only when doing
+ *          timestamps records registration, since in this case the driver
+ *          will have it's own buffer which serve as a records pool instead of
+ *          allocating records dynamically.
+ */
+struct timestamp_reg_info {
+       struct hl_ts_buff       *ts_buff;
+       struct hl_cb            *cq_cb;
+       u64                     *timestamp_kernel_addr;
+       u8                      in_use;
+};
+
 /**
  * struct hl_user_pending_interrupt - holds a context to a user thread
  *                                    pending on an interrupt
+ * @ts_reg_info: holds the timestamps registration nodes info
  * @wait_list_node: node in the list of user threads pending on an interrupt
  * @fence: hl fence object for interrupt completion
  * @cq_target_value: CQ target value
@@ -896,10 +976,11 @@ struct hl_user_interrupt {
  *                  handler for taget value comparison
  */
 struct hl_user_pending_interrupt {
-       struct list_head        wait_list_node;
-       struct hl_fence         fence;
-       u64                     cq_target_value;
-       u64                     *cq_kernel_addr;
+       struct timestamp_reg_info       ts_reg_info;
+       struct list_head                wait_list_node;
+       struct hl_fence                 fence;
+       u64                             cq_target_value;
+       u64                             *cq_kernel_addr;
 };
 
 /**
@@ -1833,6 +1914,7 @@ struct hl_debug_params {
  * @ctx: current executing context. TODO: remove for multiple ctx per process
  * @ctx_mgr: context manager to handle multiple context for this FD.
  * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
+ * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
  * @debugfs_list: list of relevant ASIC debugfs.
  * @dev_node: node in the device list of file private data
  * @refcount: number of related contexts.
@@ -1845,6 +1927,7 @@ struct hl_fpriv {
        struct hl_ctx           *ctx;
        struct hl_ctx_mgr       ctx_mgr;
        struct hl_cb_mgr        cb_mgr;
+       struct hl_ts_mgr        ts_mem_mgr;
        struct list_head        debugfs_list;
        struct list_head        dev_node;
        struct kref             refcount;
@@ -2517,7 +2600,7 @@ struct hl_reset_info {
  * @cq_wq: work queues of completion queues for executing work in process
  *         context.
  * @eq_wq: work queue of event queue for executing work in process context.
- * @sob_reset_wq: work queue for sob reset executions.
+ * @ts_free_obj_wq: work queue for timestamp registration objects release.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @cs_mirror_list: CS mirror list for TDR.
@@ -2645,7 +2728,7 @@ struct hl_device {
        struct hl_user_interrupt        common_user_interrupt;
        struct workqueue_struct         **cq_wq;
        struct workqueue_struct         *eq_wq;
-       struct workqueue_struct         *sob_reset_wq;
+       struct workqueue_struct         *ts_free_obj_wq;
        struct hl_ctx                   *kernel_ctx;
        struct hl_hw_queue              *kernel_queues;
        struct list_head                cs_mirror_list;
@@ -3128,6 +3211,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
                                        const char *format, ...);
 char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
 const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
+void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
+void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
+int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
+void hl_ts_put(struct hl_ts_buff *buff);
 
 #ifdef CONFIG_DEBUG_FS
 
index 017bfc4551da4d287f9f63c1c8541c2358426a5d..ca404ed9d9a737538c225b3b727619e6287c2d3d 100644 (file)
@@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
        hl_cb_mgr_init(&hpriv->cb_mgr);
        hl_ctx_mgr_init(&hpriv->ctx_mgr);
+       hl_ts_mgr_init(&hpriv->ts_mem_mgr);
 
        hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
 
@@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 out_err:
        mutex_unlock(&hdev->fpriv_list_lock);
        hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+       hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
        hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
        filp->private_data = NULL;
        mutex_destroy(&hpriv->restore_phase_mutex);
index 1b6bdc900c26d3e863ffa1aa03e3aa977c68de0a..c28f3a1c7a0370ca18b2cd0c1a9c9ac39606d28d 100644 (file)
@@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
        return IRQ_HANDLED;
 }
 
+/*
+ * hl_ts_free_objects - handler of the free objects workqueue.
+ * This function should put refcount to objects that the registration node
+ * took refcount to them.
+ * @work: workqueue object pointer
+ */
+static void hl_ts_free_objects(struct work_struct *work)
+{
+       struct timestamp_reg_work_obj *job =
+                       container_of(work, struct timestamp_reg_work_obj, free_obj);
+       struct timestamp_reg_free_node *free_obj, *temp_free_obj;
+       struct list_head *free_list_head = job->free_obj_head;
+       struct hl_device *hdev = job->hdev;
+
+       list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
+               dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
+                                       free_obj->ts_buff,
+                                       free_obj->cq_cb);
+
+               hl_ts_put(free_obj->ts_buff);
+               hl_cb_put(free_obj->cq_cb);
+               kfree(free_obj);
+       }
+
+       kfree(free_list_head);
+       kfree(job);
+}
+
+/*
+ * This function called with spin_lock of wait_list_lock taken
+ * This function will set timestamp and delete the registration node from the
+ * wait_list_lock.
+ * and since we're protected with spin_lock here, so we cannot just put the refcount
+ * for the objects here, since the release function may be called and it's also a long
+ * logic (which might sleep also) that cannot be handled in irq context.
+ * so here we'll be filling a list with nodes of "put" jobs and then will send this
+ * list to a dedicated workqueue to do the actual put.
+ */
+int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
+                                               struct list_head **free_list)
+{
+       struct timestamp_reg_free_node *free_node;
+       u64 timestamp;
+
+       if (!(*free_list)) {
+               /* Alloc/Init the timestamp registration free objects list */
+               *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
+               if (!(*free_list))
+                       return -ENOMEM;
+
+               INIT_LIST_HEAD(*free_list);
+       }
+
+       free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
+       if (!free_node)
+               return -ENOMEM;
+
+       timestamp = ktime_get_ns();
+
+       *pend->ts_reg_info.timestamp_kernel_addr = timestamp;
+
+       dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
+                       pend->ts_reg_info.timestamp_kernel_addr,
+                       *(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
+
+       list_del(&pend->wait_list_node);
+
+       /* Mark kernel CB node as free */
+       pend->ts_reg_info.in_use = 0;
+
+       /* Putting the refcount for ts_buff and cq_cb objects will be handled
+        * in workqueue context, just add job to free_list.
+        */
+       free_node->ts_buff = pend->ts_reg_info.ts_buff;
+       free_node->cq_cb = pend->ts_reg_info.cq_cb;
+       list_add(&free_node->free_objects_node, *free_list);
+
+       return 0;
+}
+
 static void handle_user_cq(struct hl_device *hdev,
                        struct hl_user_interrupt *user_cq)
 {
-       struct hl_user_pending_interrupt *pend;
+       struct hl_user_pending_interrupt *pend, *temp_pend;
+       struct list_head *ts_reg_free_list_head = NULL;
+       struct timestamp_reg_work_obj *job;
+       bool reg_node_handle_fail = false;
        ktime_t now = ktime_get();
+       int rc;
+
+       /* For registration nodes:
+        * As part of handling the registration nodes, we should put refcount to
+        * some objects. the problem is that we cannot do that under spinlock
+        * or in irq handler context at all (since release functions are long and
+        * might sleep), so we will need to handle that part in workqueue context.
+        * To avoid handling kmalloc failure which compels us rolling back actions
+        * and move nodes hanged on the free list back to the interrupt wait list
+        * we always alloc the job of the WQ at the beginning.
+        */
+       job = kmalloc(sizeof(*job), GFP_ATOMIC);
+       if (!job)
+               return;
 
        spin_lock(&user_cq->wait_list_lock);
-       list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
-               if ((pend->cq_kernel_addr &&
-                               *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
+       list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
+               if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
                                !pend->cq_kernel_addr) {
-                       pend->fence.timestamp = now;
-                       complete_all(&pend->fence.completion);
+                       if (pend->ts_reg_info.ts_buff) {
+                               if (!reg_node_handle_fail) {
+                                       rc = handle_registration_node(hdev, pend,
+                                                                       &ts_reg_free_list_head);
+                                       if (rc)
+                                               reg_node_handle_fail = true;
+                               }
+                       } else {
+                               /* Handle wait target value node */
+                               pend->fence.timestamp = now;
+                               complete_all(&pend->fence.completion);
+                       }
                }
        }
        spin_unlock(&user_cq->wait_list_lock);
+
+       if (ts_reg_free_list_head) {
+               INIT_WORK(&job->free_obj, hl_ts_free_objects);
+               job->free_obj_head = ts_reg_free_list_head;
+               job->hdev = hdev;
+               queue_work(hdev->ts_free_obj_wq, &job->free_obj);
+       } else {
+               kfree(job);
+       }
 }
 
 /**
index f60a9638d9eac825455e2850795be7a35f39c89c..c0904081f37e7ff99bf50dded5a76fde3599a787 100644 (file)
@@ -20,6 +20,9 @@ MODULE_IMPORT_NS(DMA_BUF);
 /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
 #define DRAM_POOL_PAGE_SIZE SZ_8M
 
+static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
+                       struct hl_mem_in *args, u64 *handle);
+
 /*
  * The va ranges in context object contain a list with the available chunks of
  * device virtual memory.
@@ -2021,6 +2024,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
                rc = -EPERM;
                break;
 
+       case HL_MEM_OP_TS_ALLOC:
+               rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
+               break;
        default:
                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
                rc = -EINVAL;
@@ -2031,6 +2037,258 @@ out:
        return rc;
 }
 
+static void ts_buff_release(struct kref *ref)
+{
+       struct hl_ts_buff *buff;
+
+       buff = container_of(ref, struct hl_ts_buff, refcount);
+
+       vfree(buff->kernel_buff_address);
+       vfree(buff->user_buff_address);
+       kfree(buff);
+}
+
+struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
+                                       u32 handle)
+{
+       struct hl_ts_buff *buff;
+
+       spin_lock(&mgr->ts_lock);
+       buff = idr_find(&mgr->ts_handles, handle);
+       if (!buff) {
+               spin_unlock(&mgr->ts_lock);
+               dev_warn(hdev->dev,
+                       "TS buff get failed, no match to handle 0x%x\n", handle);
+               return NULL;
+       }
+       kref_get(&buff->refcount);
+       spin_unlock(&mgr->ts_lock);
+
+       return buff;
+}
+
+void hl_ts_put(struct hl_ts_buff *buff)
+{
+       kref_put(&buff->refcount, ts_buff_release);
+}
+
+static void buff_vm_close(struct vm_area_struct *vma)
+{
+       struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
+       long new_mmap_size;
+
+       new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
+
+       if (new_mmap_size > 0) {
+               buff->mmap_size = new_mmap_size;
+               return;
+       }
+
+       atomic_set(&buff->mmap, 0);
+       hl_ts_put(buff);
+       vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct ts_buff_vm_ops = {
+       .close = buff_vm_close
+};
+
+int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_ts_buff *buff;
+       u32 handle, user_buff_size;
+       int rc;
+
+       /* We use the page offset to hold the idr and thus we need to clear
+        * it before doing the mmap itself
+        */
+       handle = vma->vm_pgoff;
+       vma->vm_pgoff = 0;
+
+       buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
+       if (!buff) {
+               dev_err(hdev->dev,
+                       "TS buff mmap failed, no match to handle 0x%x\n", handle);
+               return -EINVAL;
+       }
+
+       /* Validation check */
+       user_buff_size = vma->vm_end - vma->vm_start;
+       if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
+               dev_err(hdev->dev,
+                       "TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
+                       user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
+               rc = -EINVAL;
+               goto put_buff;
+       }
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+       if (!access_ok(VERIFY_WRITE,
+               (void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
+#else
+       if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
+                                               user_buff_size)) {
+#endif
+               dev_err(hdev->dev,
+                       "user pointer is invalid - 0x%lx\n",
+                       vma->vm_start);
+
+               rc = -EINVAL;
+               goto put_buff;
+       }
+
+       if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
+               dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
+               rc = -EINVAL;
+               goto put_buff;
+       }
+
+       vma->vm_ops = &ts_buff_vm_ops;
+       vma->vm_private_data = buff;
+       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
+       rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
+       if (rc) {
+               atomic_set(&buff->mmap, 0);
+               goto put_buff;
+       }
+
+       buff->mmap_size = buff->user_buff_size;
+       vma->vm_pgoff = handle;
+
+       return 0;
+
+put_buff:
+       hl_ts_put(buff);
+       return rc;
+}
+
+void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
+{
+       spin_lock_init(&mgr->ts_lock);
+       idr_init(&mgr->ts_handles);
+}
+
+void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
+{
+       struct hl_ts_buff *buff;
+       struct idr *idp;
+       u32 id;
+
+       idp = &mgr->ts_handles;
+
+       idr_for_each_entry(idp, buff, id) {
+               if (kref_put(&buff->refcount, ts_buff_release) != 1)
+                       dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
+                                                       id);
+       }
+
+       idr_destroy(&mgr->ts_handles);
+}
+
+static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
+{
+       struct hl_ts_buff *ts_buff = NULL;
+       u32 size;
+       void *p;
+
+       ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
+       if (!ts_buff)
+               return NULL;
+
+       /* Allocate the user buffer */
+       size = num_elements * sizeof(u64);
+       p = vmalloc_user(size);
+       if (!p)
+               goto free_mem;
+
+       ts_buff->user_buff_address = p;
+       ts_buff->user_buff_size = size;
+
+       /* Allocate the internal kernel buffer */
+       size = num_elements * sizeof(struct hl_user_pending_interrupt);
+       p = vmalloc(size);
+       if (!p)
+               goto free_user_buff;
+
+       ts_buff->kernel_buff_address = p;
+       ts_buff->kernel_buff_size = size;
+
+       return ts_buff;
+
+free_user_buff:
+       vfree(ts_buff->user_buff_address);
+free_mem:
+       kfree(ts_buff);
+       return NULL;
+}
+
+/**
+ * allocate_timestamps_buffers() - allocate timestamps buffers
+ * This function will allocate ts buffer that will later on be mapped to the user
+ * in order to be able to read the timestamp.
+ * in additon it'll allocate an extra buffer for registration management.
+ * since we cannot fail during registration for out-of-memory situation, so
+ * we'll prepare a pool which will be used as user interrupt nodes and instead
+ * of dynamically allocating nodes while registration we'll pick the node from
+ * this pool. in addtion it'll add node to the mapping hash which will be used
+ * to map user ts buffer to the internal kernel ts buffer.
+ * @hpriv: pointer to the private data of the fd
+ * @args: ioctl input
+ * @handle: user timestamp buffer handle as an output
+ */
+static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
+{
+       struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_ts_buff *ts_buff;
+       int rc = 0;
+
+       if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
+               dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
+                               args->num_of_elements, TS_MAX_ELEMENTS_NUM);
+               return -EINVAL;
+       }
+
+       /* Allocate ts buffer object
+        * This object will contain two buffers one that will be mapped to the user
+        * and another internal buffer for the driver use only, which won't be mapped
+        * to the user.
+        */
+       ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
+       if (!ts_buff) {
+               rc = -ENOMEM;
+               goto out_err;
+       }
+
+       spin_lock(&ts_mgr->ts_lock);
+       rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
+       spin_unlock(&ts_mgr->ts_lock);
+       if (rc < 0) {
+               dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
+               goto release_ts_buff;
+       }
+
+       ts_buff->id = rc;
+       ts_buff->hdev = hdev;
+
+       kref_init(&ts_buff->refcount);
+
+       /* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
+       *handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
+       *handle <<= PAGE_SHIFT;
+
+       dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
+
+       return 0;
+
+release_ts_buff:
+       kref_put(&ts_buff->refcount, ts_buff_release);
+out_err:
+       *handle = 0;
+       return rc;
+}
+
 int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 {
        enum hl_device_status status;
@@ -2146,6 +2404,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
                args->out.fd = dmabuf_fd;
                break;
 
+       case HL_MEM_OP_TS_ALLOC:
+               rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
+               break;
        default:
                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
                rc = -EINVAL;
index 12976f7a8d841b0f7e95ecb46d9d884be9bac203..e21db03196ae3e399713a5b621f2c145fb0c197a 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
  *
- * Copyright 2016-2020 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -30,6 +30,9 @@
  */
 #define GAUDI_FIRST_AVAILABLE_W_S_MONITOR              72
 
+/* Max number of elements in timestamps registration buffers */
+#define        TS_MAX_ELEMENTS_NUM                             (1 << 20) /* 1MB */
+
 /*
  * Goya queue Numbering
  *
@@ -695,10 +698,12 @@ struct hl_cb_in {
        __u64 cb_handle;
        /* HL_CB_OP_* */
        __u32 op;
+
        /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that
         * will be allocated, regardless of this parameter's value, is PAGE_SIZE
         */
        __u32 cb_size;
+
        /* Context ID - Currently not in use */
        __u32 ctx_id;
        /* HL_CB_FLAGS_* */
@@ -964,6 +969,7 @@ union hl_cs_args {
 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK                0xFFF00000
 #define HL_WAIT_CS_FLAGS_MULTI_CS              0x4
 #define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ   0x10
+#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT    0x20
 
 #define HL_WAIT_MULTI_CS_LIST_MAX_LEN  32
 
@@ -1036,6 +1042,20 @@ struct hl_wait_cs_in {
         * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set
         */
        __u64 cq_counters_offset;
+
+       /*
+        * Timestamp_handle timestamps buffer handle.
+        * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
+        */
+       __u64 timestamp_handle;
+
+       /*
+        * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above.
+        * upon interrupt, if the cq reached the target value then driver will write
+        * timestamp to this offset.
+        * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set
+        */
+       __u64 timestamp_offset;
 };
 
 #define HL_WAIT_CS_STATUS_COMPLETED    0
@@ -1082,6 +1102,14 @@ union hl_wait_cs_args {
  */
 #define HL_MEM_OP_EXPORT_DMABUF_FD     5
 
+/* Opcode to create timestamps pool for user interrupts registration support
+ * The memory will be allocated by the kernel driver, A timestamp buffer which the user
+ * will get handle to it for mmap, and another internal buffer used by the
+ * driver for registration management
+ * The memory will be freed when the user closes the file descriptor(ctx close)
+ */
+#define HL_MEM_OP_TS_ALLOC             6
+
 /* Memory flags */
 #define HL_MEM_CONTIGUOUS      0x1
 #define HL_MEM_SHARED          0x2
@@ -1173,9 +1201,14 @@ struct hl_mem_in {
         * DMA-BUF file/FD flags.
         */
        __u32 flags;
+
        /* Context ID - Currently not in use */
        __u32 ctx_id;
-       __u32 pad;
+
+       /* number of timestamp elements
+        * used only when HL_MEM_OP_TS_ALLOC opcode
+        */
+       __u32 num_of_elements;
 };
 
 struct hl_mem_out {