drm/vc4: Remove BOs seqnos
authorMaíra Canal <mcanal@igalia.com>
Fri, 20 Dec 2024 13:37:09 +0000 (10:37 -0300)
committerMaíra Canal <mcanal@igalia.com>
Sun, 19 Jan 2025 18:31:00 +0000 (15:31 -0300)
`bo->seqno`, `bo->write_seqno`, and `exec->bin_dep_seqno` are leftovers
from a time when VC4 didn't support DMA Reservation Objects. Before DMA
Resv was introduced, tracking the correspondence between BOs and jobs
through the job's seqno made sense.

However, this is no longer needed, as VC4 now supports DMA Reservation
Objects and attaches the "job done" fence to the BOs. Therefore, remove
the BOs seqnos in favor of using DMA Resv Objects.

Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Maxime Ripard <mripard@kernel.org>
Reviewed-by: Melissa Wen <mwen@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241220134204.634577-4-mcanal@igalia.com
drivers/gpu/drm/vc4/vc4_crtc.c
drivers/gpu/drm/vc4/vc4_drv.h
drivers/gpu/drm/vc4/vc4_gem.c
drivers/gpu/drm/vc4/vc4_validate.c

index cf40a53ad42e2e3114b191980555c93ac9f8436a..2a48038abe7aa7a2e383fd37284e76857155ad38 100644 (file)
@@ -884,11 +884,7 @@ struct vc4_async_flip_state {
        struct drm_framebuffer *fb;
        struct drm_framebuffer *old_fb;
        struct drm_pending_vblank_event *event;
-
-       union {
-               struct dma_fence_cb fence;
-               struct vc4_seqno_cb seqno;
-       } cb;
+       struct dma_fence_cb cb;
 };
 
 /* Called when the V3D execution for the BO being flipped to is done, so that
@@ -919,10 +915,11 @@ vc4_async_page_flip_complete(struct vc4_async_flip_state *flip_state)
        kfree(flip_state);
 }
 
-static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb)
+static void vc4_async_page_flip_complete_with_cleanup(struct dma_fence *fence,
+                                                     struct dma_fence_cb *cb)
 {
        struct vc4_async_flip_state *flip_state =
-               container_of(cb, struct vc4_async_flip_state, cb.seqno);
+               container_of(cb, struct vc4_async_flip_state, cb);
        struct vc4_bo *bo = NULL;
 
        if (flip_state->old_fb) {
@@ -932,6 +929,7 @@ static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb)
        }
 
        vc4_async_page_flip_complete(flip_state);
+       dma_fence_put(fence);
 
        /*
         * Decrement the BO usecnt in order to keep the inc/dec
@@ -950,7 +948,7 @@ static void vc4_async_page_flip_fence_complete(struct dma_fence *fence,
                                               struct dma_fence_cb *cb)
 {
        struct vc4_async_flip_state *flip_state =
-               container_of(cb, struct vc4_async_flip_state, cb.fence);
+               container_of(cb, struct vc4_async_flip_state, cb);
 
        vc4_async_page_flip_complete(flip_state);
        dma_fence_put(fence);
@@ -961,16 +959,15 @@ static int vc4_async_set_fence_cb(struct drm_device *dev,
 {
        struct drm_framebuffer *fb = flip_state->fb;
        struct drm_gem_dma_object *dma_bo = drm_fb_dma_get_gem_obj(fb, 0);
+       dma_fence_func_t async_page_flip_complete_function;
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        struct dma_fence *fence;
        int ret;
 
-       if (vc4->gen == VC4_GEN_4) {
-               struct vc4_bo *bo = to_vc4_bo(&dma_bo->base);
-
-               return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno,
-                                         vc4_async_page_flip_seqno_complete);
-       }
+       if (vc4->gen == VC4_GEN_4)
+               async_page_flip_complete_function = vc4_async_page_flip_complete_with_cleanup;
+       else
+               async_page_flip_complete_function = vc4_async_page_flip_fence_complete;
 
        ret = dma_resv_get_singleton(dma_bo->base.resv, DMA_RESV_USAGE_READ, &fence);
        if (ret)
@@ -978,14 +975,14 @@ static int vc4_async_set_fence_cb(struct drm_device *dev,
 
        /* If there's no fence, complete the page flip immediately */
        if (!fence) {
-               vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+               async_page_flip_complete_function(fence, &flip_state->cb);
                return 0;
        }
 
        /* If the fence has already been completed, complete the page flip */
-       if (dma_fence_add_callback(fence, &flip_state->cb.fence,
-                                  vc4_async_page_flip_fence_complete))
-               vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+       if (dma_fence_add_callback(fence, &flip_state->cb,
+                                  async_page_flip_complete_function))
+               async_page_flip_complete_function(fence, &flip_state->cb);
 
        return 0;
 }
index 4a078ffd9f822c9e6873a91db25354b28e723c71..221d8e01d5391e075aed9d7f403a9818f850f4d8 100644 (file)
@@ -186,11 +186,6 @@ struct vc4_dev {
         */
        struct vc4_perfmon *active_perfmon;
 
-       /* List of struct vc4_seqno_cb for callbacks to be made from a
-        * workqueue when the given seqno is passed.
-        */
-       struct list_head seqno_cb_list;
-
        /* The memory used for storing binner tile alloc, tile state,
         * and overflow memory allocations.  This is freed when V3D
         * powers down.
@@ -247,16 +242,6 @@ struct vc4_dev {
 struct vc4_bo {
        struct drm_gem_dma_object base;
 
-       /* seqno of the last job to render using this BO. */
-       uint64_t seqno;
-
-       /* seqno of the last job to use the RCL to write to this BO.
-        *
-        * Note that this doesn't include binner overflow memory
-        * writes.
-        */
-       uint64_t write_seqno;
-
        bool t_format;
 
        /* List entry for the BO's position in either
@@ -304,12 +289,6 @@ struct vc4_fence {
 #define to_vc4_fence(_fence)                                   \
        container_of_const(_fence, struct vc4_fence, base)
 
-struct vc4_seqno_cb {
-       struct work_struct work;
-       uint64_t seqno;
-       void (*func)(struct vc4_seqno_cb *cb);
-};
-
 struct vc4_v3d {
        struct vc4_dev *vc4;
        struct platform_device *pdev;
@@ -695,9 +674,6 @@ struct vc4_exec_info {
        /* Sequence number for this bin/render job. */
        uint64_t seqno;
 
-       /* Latest write_seqno of any BO that binning depends on. */
-       uint64_t bin_dep_seqno;
-
        struct dma_fence *fence;
 
        /* Last current addresses the hardware was processing when the
@@ -1025,9 +1001,6 @@ void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec);
 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
                       uint64_t timeout_ns, bool interruptible);
 void vc4_job_handle_completed(struct vc4_dev *vc4);
-int vc4_queue_seqno_cb(struct drm_device *dev,
-                      struct vc4_seqno_cb *cb, uint64_t seqno,
-                      void (*func)(struct vc4_seqno_cb *cb));
 int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
                          struct drm_file *file_priv);
 
index de7be9942c1342ea55538cf51dd0e9b8d6a4ac7d..8125f87edc609fbc5dc9272e0a5a4e1cbdeb73db 100644 (file)
@@ -553,27 +553,19 @@ vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
 }
 
 static void
-vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
+vc4_attach_fences(struct vc4_exec_info *exec)
 {
        struct vc4_bo *bo;
        unsigned i;
 
        for (i = 0; i < exec->bo_count; i++) {
                bo = to_vc4_bo(exec->bo[i]);
-               bo->seqno = seqno;
-
                dma_resv_add_fence(bo->base.base.resv, exec->fence,
                                   DMA_RESV_USAGE_READ);
        }
 
-       list_for_each_entry(bo, &exec->unref_list, unref_head) {
-               bo->seqno = seqno;
-       }
-
        for (i = 0; i < exec->rcl_write_bo_count; i++) {
                bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
-               bo->write_seqno = seqno;
-
                dma_resv_add_fence(bo->base.base.resv, exec->fence,
                                   DMA_RESV_USAGE_WRITE);
        }
@@ -647,7 +639,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
        if (out_sync)
                drm_syncobj_replace_fence(out_sync, exec->fence);
 
-       vc4_update_bo_seqnos(exec, seqno);
+       vc4_attach_fences(exec);
 
        drm_exec_fini(exec_ctx);
 
@@ -845,12 +837,6 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
                        goto fail;
        }
 
-       /* Block waiting on any previous rendering into the CS's VBO,
-        * IB, or textures, so that pixels are actually written by the
-        * time we try to read them.
-        */
-       ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
-
 fail:
        kvfree(temp);
        return ret;
@@ -909,7 +895,6 @@ void
 vc4_job_handle_completed(struct vc4_dev *vc4)
 {
        unsigned long irqflags;
-       struct vc4_seqno_cb *cb, *cb_temp;
 
        if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
                return;
@@ -926,48 +911,9 @@ vc4_job_handle_completed(struct vc4_dev *vc4)
                spin_lock_irqsave(&vc4->job_lock, irqflags);
        }
 
-       list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
-               if (cb->seqno <= vc4->finished_seqno) {
-                       list_del_init(&cb->work.entry);
-                       schedule_work(&cb->work);
-               }
-       }
-
        spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 }
 
-static void vc4_seqno_cb_work(struct work_struct *work)
-{
-       struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
-
-       cb->func(cb);
-}
-
-int vc4_queue_seqno_cb(struct drm_device *dev,
-                      struct vc4_seqno_cb *cb, uint64_t seqno,
-                      void (*func)(struct vc4_seqno_cb *cb))
-{
-       struct vc4_dev *vc4 = to_vc4_dev(dev);
-       unsigned long irqflags;
-
-       if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
-               return -ENODEV;
-
-       cb->func = func;
-       INIT_WORK(&cb->work, vc4_seqno_cb_work);
-
-       spin_lock_irqsave(&vc4->job_lock, irqflags);
-       if (seqno > vc4->finished_seqno) {
-               cb->seqno = seqno;
-               list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
-       } else {
-               schedule_work(&cb->work);
-       }
-       spin_unlock_irqrestore(&vc4->job_lock, irqflags);
-
-       return 0;
-}
-
 /* Scheduled when any job has been completed, this walks the list of
  * jobs that had completed and unrefs their BOs and frees their exec
  * structs.
@@ -1221,7 +1167,6 @@ int vc4_gem_init(struct drm_device *dev)
        INIT_LIST_HEAD(&vc4->bin_job_list);
        INIT_LIST_HEAD(&vc4->render_job_list);
        INIT_LIST_HEAD(&vc4->job_done_list);
-       INIT_LIST_HEAD(&vc4->seqno_cb_list);
        spin_lock_init(&vc4->job_lock);
 
        INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
index 5bf134968adecfc6fd2964a32836a61c011d0dfa..1e7bdda556984b41da3fcfeafdc59a0a1ecbcfdd 100644 (file)
@@ -284,9 +284,6 @@ validate_indexed_prim_list(VALIDATE_ARGS)
        if (!ib)
                return -EINVAL;
 
-       exec->bin_dep_seqno = max(exec->bin_dep_seqno,
-                                 to_vc4_bo(&ib->base)->write_seqno);
-
        if (offset > ib->base.size ||
            (ib->base.size - offset) / index_size < length) {
                DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
@@ -738,11 +735,6 @@ reloc_tex(struct vc4_exec_info *exec,
 
        *validated_p0 = tex->dma_addr + p0;
 
-       if (is_cs) {
-               exec->bin_dep_seqno = max(exec->bin_dep_seqno,
-                                         to_vc4_bo(&tex->base)->write_seqno);
-       }
-
        return true;
  fail:
        DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
@@ -904,9 +896,6 @@ validate_gl_shader_rec(struct drm_device *dev,
                uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
                uint32_t max_index;
 
-               exec->bin_dep_seqno = max(exec->bin_dep_seqno,
-                                         to_vc4_bo(&vbo->base)->write_seqno);
-
                if (state->addr & 0x8)
                        stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;