drm/amdgpu: use doorbell mgr for kfd process doorbells
authorShashank Sharma <shashank.sharma@amd.com>
Fri, 14 Jul 2023 14:13:54 +0000 (16:13 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 7 Aug 2023 21:14:07 +0000 (17:14 -0400)
This patch:
- adds a doorbell object in kfd pdd structure.
- allocates doorbells for a process while creating its queue.
- frees the doorbells with pdd destroy.
- moves doorbell bitmap init function to kfd_doorbell.c

PS: This patch ensures that we don't break the existing KFD
    functionality, but now KFD userspace library should also
    create doorbell pages as AMDGPU GEM objects using libdrm
    functions in userspace. The reference code for the same
    is available with AMDGPU Usermode queue libdrm MR. Once
    this is done, we will not need to create process doorbells
    in kernel.

V2: - Do not use doorbell wrapper API, use amdgpu_bo_create_kernel
      instead (Alex).
    - Do not use custom doorbell structure, instead use separate
      variables for bo and doorbell_bitmap (Alex)
V3:
   - Do not allocate doorbell page with PDD, delay doorbell process
     page allocation until really needed (Felix)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felilx.Kuehling@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index aef8e12df61f6689c684acf51620ef997533183c..f3a0efbdd145dba581e8fad62fd5d905aaf37be7 100644 (file)
@@ -333,10 +333,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
                goto err_bind_process;
        }
 
-       if (!pdd->doorbell_index &&
-           kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) {
-               err = -ENOMEM;
-               goto err_alloc_doorbells;
+       if (!pdd->qpd.proc_doorbells) {
+               err = kfd_alloc_process_doorbells(dev->kfd, pdd);
+               if (err) {
+                       pr_debug("failed to allocate process doorbells\n");
+                       goto err_bind_process;
+               }
        }
 
        /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
@@ -417,7 +419,6 @@ err_create_queue:
        if (wptr_bo)
                amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
 err_wptr_map_gart:
-err_alloc_doorbells:
 err_bind_process:
 err_pdd:
        mutex_unlock(&p->mutex);
@@ -2266,10 +2267,10 @@ static int criu_restore_devices(struct kfd_process *p,
                        goto exit;
                }
 
-               if (!pdd->doorbell_index &&
-                   kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
-                       ret = -ENOMEM;
-                       goto exit;
+               if (!pdd->qpd.proc_doorbells) {
+                       ret = kfd_alloc_process_doorbells(dev->kfd, pdd);
+                       if (ret)
+                               goto exit;
                }
        }
 
index ccaf85fc12c25b4a562aeec59c8a2abbeab85486..05909c366504c25d858c122d60ded2a5dfc2a4d7 100644 (file)
@@ -396,7 +396,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
                        unsigned int found;
 
                        found = find_first_zero_bit(qpd->doorbell_bitmap,
-                                               KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+                                                   KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
                        if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
                                pr_debug("No doorbells available");
                                return -EBUSY;
@@ -406,9 +406,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
                }
        }
 
-       q->properties.doorbell_off =
-               kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd),
-                                         q->doorbell_id);
+       q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
+                                                                 qpd->proc_doorbells,
+                                                                 q->doorbell_id);
        return 0;
 }
 
index dad81c14f6bc2495e5e3a40dc8dd20e8a4871923..216553307d3aede36609f57e52a1a542d938c6f0 100644 (file)
@@ -232,48 +232,96 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
 
 }
 
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+                               struct kfd_dev *dev)
+{
+       unsigned int i;
+       int range_start = dev->shared_resources.non_cp_doorbells_start;
+       int range_end = dev->shared_resources.non_cp_doorbells_end;
+
+       if (!KFD_IS_SOC15(dev))
+               return 0;
+
+       /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+       pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+       pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+                       range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+                       range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
+       for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
+               if (i >= range_start && i <= range_end) {
+                       __set_bit(i, qpd->doorbell_bitmap);
+                       __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+                                 qpd->doorbell_bitmap);
+               }
+       }
+
+       return 0;
+}
+
 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
 {
-       if (!pdd->doorbell_index) {
-               int r = kfd_alloc_process_doorbells(pdd->dev->kfd,
-                                                   &pdd->doorbell_index);
-               if (r < 0)
+       struct amdgpu_device *adev = pdd->dev->adev;
+       uint32_t first_db_index;
+
+       if (!pdd->qpd.proc_doorbells) {
+               if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))
+                       /* phys_addr_t 0 is error */
                        return 0;
        }
 
-       return pdd->dev->kfd->doorbell_base +
-               pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev->kfd);
+       first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
+       return adev->doorbell.base + first_db_index * sizeof(uint32_t);
 }
 
-int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
 {
-       int r = 0;
+       int r;
+       struct qcm_process_device *qpd = &pdd->qpd;
 
-       if (!kfd->shared_resources.enable_mes)
-               r = ida_simple_get(&kfd->doorbell_ida, 1,
-                                  kfd->max_doorbell_slices, GFP_KERNEL);
-       else
-               r = amdgpu_mes_alloc_process_doorbells(
-                               (struct amdgpu_device *)kfd->adev,
-                               doorbell_index);
+       /* Allocate bitmap for dynamic doorbell allocation */
+       qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+                                            GFP_KERNEL);
+       if (!qpd->doorbell_bitmap) {
+               DRM_ERROR("Failed to allocate process doorbell bitmap\n");
+               return -ENOMEM;
+       }
 
-       if (r > 0)
-               *doorbell_index = r;
+       r = init_doorbell_bitmap(&pdd->qpd, kfd);
+       if (r) {
+               DRM_ERROR("Failed to initialize process doorbells\n");
+               r = -ENOMEM;
+               goto err;
+       }
 
-       if (r < 0)
-               pr_err("Failed to allocate process doorbells\n");
+       /* Allocate doorbells for this process */
+       r = amdgpu_bo_create_kernel(kfd->adev,
+                                   kfd_doorbell_process_slice(kfd),
+                                   PAGE_SIZE,
+                                   AMDGPU_GEM_DOMAIN_DOORBELL,
+                                   &qpd->proc_doorbells,
+                                   NULL,
+                                   NULL);
+       if (r) {
+               DRM_ERROR("Failed to allocate process doorbells\n");
+               goto err;
+       }
+       return 0;
 
+err:
+       bitmap_free(qpd->doorbell_bitmap);
+       qpd->doorbell_bitmap = NULL;
        return r;
 }
 
-void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
+void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
 {
-       if (doorbell_index) {
-               if (!kfd->shared_resources.enable_mes)
-                       ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
-               else
-                       amdgpu_mes_free_process_doorbells(
-                                       (struct amdgpu_device *)kfd->adev,
-                                       doorbell_index);
+       struct qcm_process_device *qpd = &pdd->qpd;
+
+       if (qpd->doorbell_bitmap) {
+               bitmap_free(qpd->doorbell_bitmap);
+               qpd->doorbell_bitmap = NULL;
        }
+
+       amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
 }
index 3cb472af64d02e944588cea204edc175dc7fa381..b9215a972c8e2d0dcbdbcad4a8ea7d5ea30d1db4 100644 (file)
@@ -708,7 +708,10 @@ struct qcm_process_device {
        uint64_t ib_base;
        void *ib_kaddr;
 
-       /* doorbell resources per process per device */
+       /* doorbells for kfd process */
+       struct amdgpu_bo *proc_doorbells;
+
+       /* bitmap for dynamic doorbell allocation from the bo */
        unsigned long *doorbell_bitmap;
 };
 
@@ -1106,9 +1109,9 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
                                        unsigned int doorbell_id);
 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
 int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
-                               unsigned int *doorbell_index);
+                               struct kfd_process_device *pdd);
 void kfd_free_process_doorbells(struct kfd_dev *kfd,
-                               unsigned int doorbell_index);
+                               struct kfd_process_device *pdd);
 /* GTT Sub-Allocator */
 
 int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
index a844e68211accf10dd188d5c6f44ebe4c3486ca0..b7e8512170ac94c52bafbba35b1932493c81bffb 100644 (file)
@@ -1035,10 +1035,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
                        free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
                                get_order(KFD_CWSR_TBA_TMA_SIZE));
 
-               bitmap_free(pdd->qpd.doorbell_bitmap);
                idr_destroy(&pdd->alloc_idr);
 
-               kfd_free_process_doorbells(pdd->dev->kfd, pdd->doorbell_index);
+               kfd_free_process_doorbells(pdd->dev->kfd, pdd);
 
                if (pdd->dev->kfd->shared_resources.enable_mes)
                        amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
@@ -1550,38 +1549,6 @@ err_alloc_process:
        return ERR_PTR(err);
 }
 
-static int init_doorbell_bitmap(struct qcm_process_device *qpd,
-                       struct kfd_dev *dev)
-{
-       unsigned int i;
-       int range_start = dev->shared_resources.non_cp_doorbells_start;
-       int range_end = dev->shared_resources.non_cp_doorbells_end;
-
-       if (!KFD_IS_SOC15(dev))
-               return 0;
-
-       qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
-                                            GFP_KERNEL);
-       if (!qpd->doorbell_bitmap)
-               return -ENOMEM;
-
-       /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
-       pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
-       pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
-                       range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
-                       range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
-
-       for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
-               if (i >= range_start && i <= range_end) {
-                       __set_bit(i, qpd->doorbell_bitmap);
-                       __set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
-                                 qpd->doorbell_bitmap);
-               }
-       }
-
-       return 0;
-}
-
 struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
                                                        struct kfd_process *p)
 {
@@ -1606,11 +1573,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
        if (!pdd)
                return NULL;
 
-       if (init_doorbell_bitmap(&pdd->qpd, dev->kfd)) {
-               pr_err("Failed to init doorbell for process\n");
-               goto err_free_pdd;
-       }
-
        pdd->dev = dev;
        INIT_LIST_HEAD(&pdd->qpd.queues_list);
        INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
index 60e6b37b43badaab41ad0f63ac9263a5798c6071..adb5e4bdc0b2042053fb68dd1f65c256241f3978 100644 (file)
@@ -368,17 +368,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
                goto err_create_queue;
        }
 
-       if (q && p_doorbell_offset_in_process)
+       if (q && p_doorbell_offset_in_process) {
                /* Return the doorbell offset within the doorbell page
                 * to the caller so it can be passed up to user mode
                 * (in bytes).
-                * There are always 1024 doorbells per process, so in case
-                * of 8-byte doorbells, there are two doorbell pages per
-                * process.
+                * relative doorbell index = Absolute doorbell index -
+                * absolute index of first doorbell in the page.
                 */
-               *p_doorbell_offset_in_process =
-                       (q->properties.doorbell_off * sizeof(uint32_t)) &
-                       (kfd_doorbell_process_slice(dev->kfd) - 1);
+               uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
+                                                                      pdd->qpd.proc_doorbells,
+                                                                      0);
+
+               *p_doorbell_offset_in_process = (q->properties.doorbell_off
+                                               - first_db_index) * sizeof(uint32_t);
+       }
 
        pr_debug("PQM After DQM create queue\n");
 
@@ -933,12 +936,6 @@ int kfd_criu_restore_queue(struct kfd_process *p,
                goto exit;
        }
 
-       if (!pdd->doorbell_index &&
-           kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
-               ret = -ENOMEM;
-               goto exit;
-       }
-
        /* data stored in this order: mqd, ctl_stack */
        mqd = q_extra_data;
        ctl_stack = mqd + q_data->mqd_size;