habanalabs: add new mem ioctl op for mapping hw blocks
authorOfir Bitton <obitton@habana.ai>
Tue, 5 Jan 2021 10:55:06 +0000 (12:55 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Wed, 27 Jan 2021 19:03:51 +0000 (21:03 +0200)
For future ASIC support the driver allows user to map certain regions
in the device's configuration space for direct access from userspace.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/memory.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c
include/uapi/misc/habanalabs.h

index 13405d47d8439f8d3824aca4617f31674c922bc9..59219c862ca095844711cc9b3b9b83fc0fdde2e3 100644 (file)
@@ -142,6 +142,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
        switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
        case HL_MMAP_TYPE_CB:
                return hl_cb_mmap(hpriv, vma);
+
+       case HL_MMAP_TYPE_BLOCK:
+               return hl_hw_block_mmap(hpriv, vma);
        }
 
        return -EINVAL;
index 4129e4e6a7b50ef9b3e09ecf08eadd9209dea9b8..e105612ed57735cc866d24c8e259d8279947b5eb 100644 (file)
 #define HL_NAME                                "habanalabs"
 
 /* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:62] - Encode mmap type
+ * bits[63:61] - Encode mmap type
  * bits[45:0]  - mmap offset value
  *
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define HL_MMAP_TYPE_SHIFT             (62 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK              (0x3ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT             (61 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK              (0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_BLOCK             (0x4ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_CB                        (0x2ull << HL_MMAP_TYPE_SHIFT)
 
-#define HL_MMAP_OFFSET_VALUE_MASK      (0x3FFFFFFFFFFFull >> PAGE_SHIFT)
+#define HL_MMAP_OFFSET_VALUE_MASK      (0x1FFFFFFFFFFFull >> PAGE_SHIFT)
 #define HL_MMAP_OFFSET_VALUE_GET(off)  (off & HL_MMAP_OFFSET_VALUE_MASK)
 
 #define HL_PENDING_RESET_PER_SEC       10
@@ -856,6 +857,8 @@ enum div_select_defs {
  * @descramble_addr: Routine to de-scramble the address prior of
  *                  showing it to users.
  * @ack_protection_bits_errors: ack and dump all security violations
+ * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
+ * @hw_block_mmap: mmap a HW block with a given id.
  */
 struct hl_asic_funcs {
        int (*early_init)(struct hl_device *hdev);
@@ -968,6 +971,10 @@ struct hl_asic_funcs {
        u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
        u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
        void (*ack_protection_bits_errors)(struct hl_device *hdev);
+       int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
+                       u32 *block_id);
+       int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+                       u32 block_id, u32 block_size);
 };
 
 
@@ -2167,6 +2174,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
                        bool map_cb, u64 *handle);
 int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
 int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 struct hl_cb *hl_cb_get(struct hl_device *hdev,        struct hl_cb_mgr *mgr,
                        u32 handle);
 void hl_cb_put(struct hl_cb *cb);
index 36de0d05d3a2d2a57be8b4ee0f264ae711e23967..7171e8820a2dc0134ef1007905eeae120e1dcebc 100644 (file)
@@ -1289,11 +1289,88 @@ vm_type_err:
        return rc;
 }
 
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle)
+{
+       u32 block_id = 0;
+       int rc;
+
+       rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id);
+
+       *handle = block_id | HL_MMAP_TYPE_BLOCK;
+       *handle <<= PAGE_SHIFT;
+
+       return rc;
+}
+
+static void hw_block_vm_close(struct vm_area_struct *vma)
+{
+       struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+
+       hl_ctx_put(ctx);
+       vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hw_block_vm_ops = {
+       .close = hw_block_vm_close
+};
+
+/**
+ * hl_hw_block_mmap() - mmap a hw block to user.
+ * @hpriv: pointer to the private data of the fd
+ * @vma: pointer to vm_area_struct of the process
+ *
+ * Driver increments context reference for every HW block mapped in order
+ * to prevent user from closing FD without unmapping first
+ */
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       u32 block_id, block_size;
+       int rc;
+
+       /* We use the page offset to hold the block id and thus we need to clear
+        * it before doing the mmap itself
+        */
+       block_id = vma->vm_pgoff;
+       vma->vm_pgoff = 0;
+
+       /* Driver only allows mapping of a complete HW block */
+       block_size = vma->vm_end - vma->vm_start;
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+       if (!access_ok(VERIFY_WRITE,
+               (void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#else
+       if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#endif
+               dev_err(hdev->dev,
+                       "user pointer is invalid - 0x%lx\n",
+                       vma->vm_start);
+
+               return -EINVAL;
+       }
+
+       vma->vm_ops = &hw_block_vm_ops;
+       vma->vm_private_data = hpriv->ctx;
+
+       hl_ctx_get(hdev, hpriv->ctx);
+
+       rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
+       if (rc) {
+               hl_ctx_put(hpriv->ctx);
+               return rc;
+       }
+
+       vma->vm_pgoff = block_id;
+
+       return 0;
+}
+
 static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 {
        struct hl_device *hdev = hpriv->hdev;
        struct hl_ctx *ctx = hpriv->ctx;
-       u64 device_addr = 0;
+       u64 block_handle, device_addr = 0;
        u32 handle = 0;
        int rc;
 
@@ -1337,6 +1414,12 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
                rc = 0;
                break;
 
+       case HL_MEM_OP_MAP_BLOCK:
+               rc = map_block(hdev, args->in.map_block.block_addr,
+                                                       &block_handle);
+               args->out.handle = block_handle;
+               break;
+
        default:
                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
                rc = -ENOTTY;
@@ -1353,7 +1436,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
        union hl_mem_args *args = data;
        struct hl_device *hdev = hpriv->hdev;
        struct hl_ctx *ctx = hpriv->ctx;
-       u64 device_addr = 0;
+       u64 block_handle, device_addr = 0;
        u32 handle = 0;
        int rc;
 
@@ -1439,6 +1522,12 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
                rc = unmap_device_va(ctx, &args->in, false);
                break;
 
+       case HL_MEM_OP_MAP_BLOCK:
+               rc = map_block(hdev, args->in.map_block.block_addr,
+                                                       &block_handle);
+               args->out.handle = block_handle;
+               break;
+
        default:
                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
                rc = -ENOTTY;
index b49e10394ed433006a679cdf0b15e8f73679b683..e1c6072e5fb31cb1d98641aca52cc4fbbcafb25c 100644 (file)
@@ -8471,6 +8471,19 @@ static u64 gaudi_get_device_time(struct hl_device *hdev)
        return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
 }
 
+static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
+                                       u32 *block_id)
+{
+       return -EPERM;
+}
+
+static int gaudi_block_mmap(struct hl_device *hdev,
+                               struct vm_area_struct *vma,
+                               u32 block_id, u32 block_size)
+{
+       return -EPERM;
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
        .early_init = gaudi_early_init,
        .early_fini = gaudi_early_fini,
@@ -8550,7 +8563,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
        .scramble_addr = hl_mmu_scramble_addr,
        .descramble_addr = hl_mmu_descramble_addr,
-       .ack_protection_bits_errors = gaudi_ack_protection_bits_errors
+       .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
+       .get_hw_block_id = gaudi_get_hw_block_id,
+       .hw_block_mmap = gaudi_block_mmap
 };
 
 /**
index db951d622ad5c5d6d81465b206e5e9f8c788ac33..6b4c41188495a2b2fdc75d9b4ac5a4562fdb2a7f 100644 (file)
@@ -5382,6 +5382,18 @@ static void goya_ctx_fini(struct hl_ctx *ctx)
 
 }
 
+static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
+                               u32 *block_id)
+{
+       return -EPERM;
+}
+
+static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+                               u32 block_id, u32 block_size)
+{
+       return -EPERM;
+}
+
 static const struct hl_asic_funcs goya_funcs = {
        .early_init = goya_early_init,
        .early_fini = goya_early_fini,
@@ -5461,7 +5473,9 @@ static const struct hl_asic_funcs goya_funcs = {
        .collective_wait_create_jobs = goya_collective_wait_create_jobs,
        .scramble_addr = hl_mmu_scramble_addr,
        .descramble_addr = hl_mmu_descramble_addr,
-       .ack_protection_bits_errors = goya_ack_protection_bits_errors
+       .ack_protection_bits_errors = goya_ack_protection_bits_errors,
+       .get_hw_block_id = goya_get_hw_block_id,
+       .hw_block_mmap = goya_block_mmap
 };
 
 /*
index 866355a531882e8d225bd1e46947c01aa95fa237..b1c09eba8ac28d13192f0f89d42b0903a16e38c6 100644 (file)
@@ -718,6 +718,8 @@ union hl_wait_cs_args {
 #define HL_MEM_OP_MAP                  2
 /* Opcode to unmap previously mapped host and device memory */
 #define HL_MEM_OP_UNMAP                        3
+/* Opcode to map a hw block */
+#define HL_MEM_OP_MAP_BLOCK            4
 
 /* Memory flags */
 #define HL_MEM_CONTIGUOUS      0x1
@@ -772,6 +774,17 @@ struct hl_mem_in {
                        __u64 mem_size;
                } map_host;
 
+               /* HL_MEM_OP_MAP_BLOCK - map a hw block */
+               struct {
+                       /*
+                        * HW block address to map, a handle will be returned
+                        * to the user and will be used to mmap the relevant
+                        * block. Only addresses from configuration space are
+                        * allowed.
+                        */
+                       __u64 block_addr;
+               } map_block;
+
                /* HL_MEM_OP_UNMAP - unmap host memory */
                struct {
                        /* Virtual address returned from HL_MEM_OP_MAP */
@@ -798,8 +811,9 @@ struct hl_mem_out {
                __u64 device_virt_addr;
 
                /*
-                * Used for HL_MEM_OP_ALLOC. This is the assigned
-                * handle for the allocated memory
+                * Used for HL_MEM_OP_ALLOC and HL_MEM_OP_MAP_BLOCK.
+                * This is the assigned handle for the allocated memory
+                * or mapped block
                 */
                __u64 handle;
        };