habanalabs: use for_each_sgtable_dma_sg for dma sgt
authorOhad Sharabi <osharabi@habana.ai>
Thu, 24 Mar 2022 14:34:49 +0000 (16:34 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 22 May 2022 19:01:18 +0000 (21:01 +0200)
Instead of using for_each_sg when iterating sgt that contains dma
entries, use the more proper for_each_sgtable_dma_sg macro.

In addition, both Goya and Gaudi have the exact same implementation
of the asic function that encapsulate the usage of this macro, so
it is better to move that implementation to the common code.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/misc/habanalabs/common/debugfs.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/memory.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c

index 02b20a7b8119bf8df38ffe788d0ca94c9fced530..ffa613af6b0d8db2eae52d6d9e42ce6b70b16425 100644 (file)
@@ -370,8 +370,7 @@ static int userptr_lookup_show(struct seq_file *s, void *data)
                if (dev_entry->userptr_lookup >= userptr->addr &&
                dev_entry->userptr_lookup < userptr->addr + userptr->size) {
                        total_npages = 0;
-                       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents,
-                                       i) {
+                       for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
                                npages = hl_get_sg_info(sg, &dma_addr);
                                sg_start = userptr->addr +
                                        total_npages * PAGE_SIZE;
index 48654dfcd7b6cd8be54e67fa7d21bcae7e4c3115..9bca855b464900248215c1099f0c545bdc84ce2d 100644 (file)
@@ -80,6 +80,38 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
        return 0;
 }
 
+int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct scatterlist *sg;
+       int rc, i;
+
+       rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0);
+       if (rc)
+               return rc;
+
+       /* Shift to the device's base physical address of host memory if necessary */
+       if (prop->device_dma_offset_for_host_access)
+               for_each_sgtable_dma_sg(sgt, sg, i)
+                       sg->dma_address += prop->device_dma_offset_for_host_access;
+
+       return 0;
+}
+
+void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
+{
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct scatterlist *sg;
+       int i;
+
+       /* Cancel the device's base physical address of host memory if necessary */
+       if (prop->device_dma_offset_for_host_access)
+               for_each_sgtable_dma_sg(sgt, sg, i)
+                       sg->dma_address -= prop->device_dma_offset_for_host_access;
+
+       dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0);
+}
+
 /*
  * hl_access_cfg_region - access the config region
  *
index e9a64309e99f7b8398829e1a768062fb36d87cac..5f7e584d0f332f54f004e90a474e467577c6fecc 100644 (file)
@@ -1274,9 +1274,9 @@ struct fw_load_mgr {
  * @asic_dma_pool_free: free small DMA allocation from pool.
  * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
  * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
+ * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
  * @cs_parser: parse Command Submission.
- * @asic_dma_map_sg: DMA map scatter-gather list.
+ * @asic_dma_map_sgtable: DMA map scatter-gather table.
  * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
  * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
  * @update_eq_ci: update event queue CI.
@@ -1389,12 +1389,11 @@ struct hl_asic_funcs {
                                size_t size, dma_addr_t *dma_handle);
        void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
                                size_t size, void *vaddr);
-       void (*hl_dma_unmap_sg)(struct hl_device *hdev,
-                               struct scatterlist *sgl, int nents,
+       void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
+                               struct sg_table *sgt,
                                enum dma_data_direction dir);
        int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
-       int (*asic_dma_map_sg)(struct hl_device *hdev,
-                               struct scatterlist *sgl, int nents,
+       int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt,
                                enum dma_data_direction dir);
        u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
                                        struct sg_table *sgt);
@@ -3011,6 +3010,9 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 }
 
 uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
+int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
+void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
+                               enum dma_data_direction dir);
 int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
        enum debugfs_access_type acc_type);
 int hl_access_dev_mem(struct hl_device *hdev, struct pci_mem_region *region,
index 57d42e30818a2343b9a8a6c46272bd8c597485ed..326c2179628f7cf4805ba0dbb88c8fb42a8f7ea7 100644 (file)
@@ -238,19 +238,18 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
                goto pin_err;
        }
 
-       rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
-                                       userptr->sgt->nents, DMA_BIDIRECTIONAL);
-       if (rc) {
-               dev_err(hdev->dev, "failed to map sgt with DMA region\n");
-               goto dma_map_err;
-       }
-
        userptr->dma_mapped = true;
        userptr->dir = DMA_BIDIRECTIONAL;
        userptr->vm_type = VM_TYPE_USERPTR;
 
        *p_userptr = userptr;
 
+       rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL);
+       if (rc) {
+               dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+               goto dma_map_err;
+       }
+
        return 0;
 
 dma_map_err:
@@ -901,7 +900,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
         * consecutive block.
         */
        total_npages = 0;
-       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+       for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
                npages = hl_get_sg_info(sg, &dma_addr);
 
                total_npages += npages;
@@ -930,7 +929,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
        phys_pg_pack->total_size = total_npages * page_size;
 
        j = 0;
-       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+       for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
                npages = hl_get_sg_info(sg, &dma_addr);
 
                /* align down to physical page size and save the offset */
@@ -2444,9 +2443,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
        hl_debugfs_remove_userptr(hdev, userptr);
 
        if (userptr->dma_mapped)
-               hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
-                                                       userptr->sgt->nents,
-                                                       userptr->dir);
+               hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir);
 
        unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true);
        kvfree(userptr->pages);
index 2824d2f16a255f658f141495a7c7cb9653d4b6ff..7828337eedcec7b85ea88a673282cc85bf2fcc8d 100644 (file)
@@ -5038,37 +5038,7 @@ static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
        hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }
 
-static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
-                       int nents, enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
-
-       if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
-               return -ENOMEM;
-
-       /* Shift to the device's base physical address of host memory */
-       for_each_sg(sgl, sg, nents, i)
-               sg->dma_address += HOST_PHYS_BASE;
-
-       return 0;
-}
-
-static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
-                       int nents, enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
-
-       /* Cancel the device's base physical address of host memory */
-       for_each_sg(sgl, sg, nents, i)
-               sg->dma_address -= HOST_PHYS_BASE;
-
-       dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
-}
-
-static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
-                                       struct sg_table *sgt)
+static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
 {
        struct scatterlist *sg, *sg_next_iter;
        u32 count, dma_desc_cnt;
@@ -5077,8 +5047,7 @@ static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
 
        dma_desc_cnt = 0;
 
-       for_each_sg(sgt->sgl, sg, sgt->nents, count) {
-
+       for_each_sgtable_dma_sg(sgt, sg, count) {
                len = sg_dma_len(sg);
                addr = sg_dma_address(sg);
 
@@ -5132,8 +5101,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
 
        list_add_tail(&userptr->job_node, parser->job_userptr_list);
 
-       rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
-                                       userptr->sgt->nents, dir);
+       rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
        if (rc) {
                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
                goto unpin_memory;
@@ -5408,7 +5376,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev,
        sgt = userptr->sgt;
        dma_desc_cnt = 0;
 
-       for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+       for_each_sgtable_dma_sg(sgt, sg, count) {
                len = sg_dma_len(sg);
                dma_addr = sg_dma_address(sg);
 
@@ -9261,9 +9229,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .asic_dma_pool_free = gaudi_dma_pool_free,
        .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
        .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
-       .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
+       .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
        .cs_parser = gaudi_cs_parser,
-       .asic_dma_map_sg = gaudi_dma_map_sg,
+       .asic_dma_map_sgtable = hl_dma_map_sgtable,
        .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
        .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
        .update_eq_ci = gaudi_update_eq_ci,
index 48235795a55d326603ca4739807d39241be96caf..75736ccdfe4b03b16e6869207c125eb2f39fca9d 100644 (file)
@@ -3311,35 +3311,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
        hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
 }
 
-static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
-                               int nents, enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
-
-       if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
-               return -ENOMEM;
-
-       /* Shift to the device's base physical address of host memory */
-       for_each_sg(sgl, sg, nents, i)
-               sg->dma_address += HOST_PHYS_BASE;
-
-       return 0;
-}
-
-static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
-                               int nents, enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
-
-       /* Cancel the device's base physical address of host memory */
-       for_each_sg(sgl, sg, nents, i)
-               sg->dma_address -= HOST_PHYS_BASE;
-
-       dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
-}
-
 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
 {
        struct scatterlist *sg, *sg_next_iter;
@@ -3349,8 +3320,7 @@ u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
 
        dma_desc_cnt = 0;
 
-       for_each_sg(sgt->sgl, sg, sgt->nents, count) {
-
+       for_each_sgtable_dma_sg(sgt, sg, count) {
                len = sg_dma_len(sg);
                addr = sg_dma_address(sg);
 
@@ -3404,8 +3374,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
 
        list_add_tail(&userptr->job_node, parser->job_userptr_list);
 
-       rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
-                                       userptr->sgt->nents, dir);
+       rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
        if (rc) {
                dev_err(hdev->dev, "failed to map sgt with DMA region\n");
                goto unpin_memory;
@@ -3869,7 +3838,7 @@ static int goya_patch_dma_packet(struct hl_device *hdev,
        sgt = userptr->sgt;
        dma_desc_cnt = 0;
 
-       for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+       for_each_sgtable_dma_sg(sgt, sg, count) {
                len = sg_dma_len(sg);
                dma_addr = sg_dma_address(sg);
 
@@ -5497,9 +5466,9 @@ static const struct hl_asic_funcs goya_funcs = {
        .asic_dma_pool_free = goya_dma_pool_free,
        .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
        .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
-       .hl_dma_unmap_sg = goya_dma_unmap_sg,
+       .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
        .cs_parser = goya_cs_parser,
-       .asic_dma_map_sg = goya_dma_map_sg,
+       .asic_dma_map_sgtable = hl_dma_map_sgtable,
        .get_dma_desc_list_size = goya_get_dma_desc_list_size,
        .add_end_of_cb_packets = goya_add_end_of_cb_packets,
        .update_eq_ci = goya_update_eq_ci,