drm/vc4: Remove BOs seqnos

author Maíra Canal <mcanal@igalia.com>

Fri, 20 Dec 2024 13:37:09 +0000 (10:37 -0300)

committer Maíra Canal <mcanal@igalia.com>

Sun, 19 Jan 2025 18:31:00 +0000 (15:31 -0300)
author Maíra Canal <mcanal@igalia.com>
Fri, 20 Dec 2024 13:37:09 +0000 (10:37 -0300)
committer Maíra Canal <mcanal@igalia.com>
Sun, 19 Jan 2025 18:31:00 +0000 (15:31 -0300)
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c

index cf40a53ad42e2e3114b191980555c93ac9f8436a..2a48038abe7aa7a2e383fd37284e76857155ad38 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -884,11 +884,7 @@ struct vc4_async_flip_state {
         struct drm_framebuffer *fb;
         struct drm_framebuffer *old_fb;
         struct drm_pending_vblank_event *event;
-
-       union {
-               struct dma_fence_cb fence;
-               struct vc4_seqno_cb seqno;
-       } cb;
+       struct dma_fence_cb cb;
  };
  
  /* Called when the V3D execution for the BO being flipped to is done, so that
@@ -919,10 +915,11 @@ vc4_async_page_flip_complete(struct vc4_async_flip_state *flip_state)
         kfree(flip_state);
  }
  
-static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb)
+static void vc4_async_page_flip_complete_with_cleanup(struct dma_fence *fence,
+                                                     struct dma_fence_cb *cb)
  {
         struct vc4_async_flip_state *flip_state =
-               container_of(cb, struct vc4_async_flip_state, cb.seqno);
+               container_of(cb, struct vc4_async_flip_state, cb);
         struct vc4_bo *bo = NULL;
  
         if (flip_state->old_fb) {
@@ -932,6 +929,7 @@ static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb)
         }
  
         vc4_async_page_flip_complete(flip_state);
+       dma_fence_put(fence);
  
         /*
          * Decrement the BO usecnt in order to keep the inc/dec
@@ -950,7 +948,7 @@ static void vc4_async_page_flip_fence_complete(struct dma_fence *fence,
                                                struct dma_fence_cb *cb)
  {
         struct vc4_async_flip_state *flip_state =
-               container_of(cb, struct vc4_async_flip_state, cb.fence);
+               container_of(cb, struct vc4_async_flip_state, cb);
  
         vc4_async_page_flip_complete(flip_state);
         dma_fence_put(fence);
@@ -961,16 +959,15 @@ static int vc4_async_set_fence_cb(struct drm_device *dev,
  {
         struct drm_framebuffer *fb = flip_state->fb;
         struct drm_gem_dma_object *dma_bo = drm_fb_dma_get_gem_obj(fb, 0);
+       dma_fence_func_t async_page_flip_complete_function;
         struct vc4_dev *vc4 = to_vc4_dev(dev);
         struct dma_fence *fence;
         int ret;
  
-       if (vc4->gen == VC4_GEN_4) {
-               struct vc4_bo *bo = to_vc4_bo(&dma_bo->base);
-
-               return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno,
-                                         vc4_async_page_flip_seqno_complete);
-       }
+       if (vc4->gen == VC4_GEN_4)
+               async_page_flip_complete_function = vc4_async_page_flip_complete_with_cleanup;
+       else
+               async_page_flip_complete_function = vc4_async_page_flip_fence_complete;
  
         ret = dma_resv_get_singleton(dma_bo->base.resv, DMA_RESV_USAGE_READ, &fence);
         if (ret)
@@ -978,14 +975,14 @@ static int vc4_async_set_fence_cb(struct drm_device *dev,
  
         /* If there's no fence, complete the page flip immediately */
         if (!fence) {
-               vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+               async_page_flip_complete_function(fence, &flip_state->cb);
                 return 0;
         }
  
         /* If the fence has already been completed, complete the page flip */
-       if (dma_fence_add_callback(fence, &flip_state->cb.fence,
-                                  vc4_async_page_flip_fence_complete))
-               vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+       if (dma_fence_add_callback(fence, &flip_state->cb,
+                                  async_page_flip_complete_function))
+               async_page_flip_complete_function(fence, &flip_state->cb);
  
         return 0;
  }
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h

index 4a078ffd9f822c9e6873a91db25354b28e723c71..221d8e01d5391e075aed9d7f403a9818f850f4d8 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -186,11 +186,6 @@ struct vc4_dev {
          */
         struct vc4_perfmon *active_perfmon;
  
-       /* List of struct vc4_seqno_cb for callbacks to be made from a
-        * workqueue when the given seqno is passed.
-        */
-       struct list_head seqno_cb_list;
-
         /* The memory used for storing binner tile alloc, tile state,
          * and overflow memory allocations.  This is freed when V3D
          * powers down.
@@ -247,16 +242,6 @@ struct vc4_dev {
  struct vc4_bo {
         struct drm_gem_dma_object base;
  
-       /* seqno of the last job to render using this BO. */
-       uint64_t seqno;
-
-       /* seqno of the last job to use the RCL to write to this BO.
-        *
-        * Note that this doesn't include binner overflow memory
-        * writes.
-        */
-       uint64_t write_seqno;
-
         bool t_format;
  
         /* List entry for the BO's position in either
@@ -304,12 +289,6 @@ struct vc4_fence {
  #define to_vc4_fence(_fence)                                   \
         container_of_const(_fence, struct vc4_fence, base)
  
-struct vc4_seqno_cb {
-       struct work_struct work;
-       uint64_t seqno;
-       void (*func)(struct vc4_seqno_cb *cb);
-};
-
  struct vc4_v3d {
         struct vc4_dev *vc4;
         struct platform_device *pdev;
@@ -695,9 +674,6 @@ struct vc4_exec_info {
         /* Sequence number for this bin/render job. */
         uint64_t seqno;
  
-       /* Latest write_seqno of any BO that binning depends on. */
-       uint64_t bin_dep_seqno;
-
         struct dma_fence *fence;
  
         /* Last current addresses the hardware was processing when the
@@ -1025,9 +1001,6 @@ void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec);
  int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
                        uint64_t timeout_ns, bool interruptible);
  void vc4_job_handle_completed(struct vc4_dev *vc4);
-int vc4_queue_seqno_cb(struct drm_device *dev,
-                      struct vc4_seqno_cb *cb, uint64_t seqno,
-                      void (*func)(struct vc4_seqno_cb *cb));
  int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_priv);
  
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c

index de7be9942c1342ea55538cf51dd0e9b8d6a4ac7d..8125f87edc609fbc5dc9272e0a5a4e1cbdeb73db 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -553,27 +553,19 @@ vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
  }
  
  static void
-vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
+vc4_attach_fences(struct vc4_exec_info *exec)
  {
         struct vc4_bo *bo;
         unsigned i;
  
         for (i = 0; i < exec->bo_count; i++) {
                 bo = to_vc4_bo(exec->bo[i]);
-               bo->seqno = seqno;
-
                 dma_resv_add_fence(bo->base.base.resv, exec->fence,
                                    DMA_RESV_USAGE_READ);
         }
  
-       list_for_each_entry(bo, &exec->unref_list, unref_head) {
-               bo->seqno = seqno;
-       }
-
         for (i = 0; i < exec->rcl_write_bo_count; i++) {
                 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
-               bo->write_seqno = seqno;
-
                 dma_resv_add_fence(bo->base.base.resv, exec->fence,
                                    DMA_RESV_USAGE_WRITE);
         }
@@ -647,7 +639,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
         if (out_sync)
                 drm_syncobj_replace_fence(out_sync, exec->fence);
  
-       vc4_update_bo_seqnos(exec, seqno);
+       vc4_attach_fences(exec);
  
         drm_exec_fini(exec_ctx);
  
@@ -845,12 +837,6 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
                         goto fail;
         }
  
-       /* Block waiting on any previous rendering into the CS's VBO,
-        * IB, or textures, so that pixels are actually written by the
-        * time we try to read them.
-        */
-       ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
-
  fail:
         kvfree(temp);
         return ret;
@@ -909,7 +895,6 @@ void
  vc4_job_handle_completed(struct vc4_dev *vc4)
  {
         unsigned long irqflags;
-       struct vc4_seqno_cb *cb, *cb_temp;
  
         if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
                 return;
@@ -926,48 +911,9 @@ vc4_job_handle_completed(struct vc4_dev *vc4)
                 spin_lock_irqsave(&vc4->job_lock, irqflags);
         }
  
-       list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
-               if (cb->seqno <= vc4->finished_seqno) {
-                       list_del_init(&cb->work.entry);
-                       schedule_work(&cb->work);
-               }
-       }
-
         spin_unlock_irqrestore(&vc4->job_lock, irqflags);
  }
  
-static void vc4_seqno_cb_work(struct work_struct *work)
-{
-       struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
-
-       cb->func(cb);
-}
-
-int vc4_queue_seqno_cb(struct drm_device *dev,
-                      struct vc4_seqno_cb *cb, uint64_t seqno,
-                      void (*func)(struct vc4_seqno_cb *cb))
-{
-       struct vc4_dev *vc4 = to_vc4_dev(dev);
-       unsigned long irqflags;
-
-       if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
-               return -ENODEV;
-
-       cb->func = func;
-       INIT_WORK(&cb->work, vc4_seqno_cb_work);
-
-       spin_lock_irqsave(&vc4->job_lock, irqflags);
-       if (seqno > vc4->finished_seqno) {
-               cb->seqno = seqno;
-               list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
-       } else {
-               schedule_work(&cb->work);
-       }
-       spin_unlock_irqrestore(&vc4->job_lock, irqflags);
-
-       return 0;
-}
-
  /* Scheduled when any job has been completed, this walks the list of
   * jobs that had completed and unrefs their BOs and frees their exec
   * structs.
@@ -1221,7 +1167,6 @@ int vc4_gem_init(struct drm_device *dev)
         INIT_LIST_HEAD(&vc4->bin_job_list);
         INIT_LIST_HEAD(&vc4->render_job_list);
         INIT_LIST_HEAD(&vc4->job_done_list);
-       INIT_LIST_HEAD(&vc4->seqno_cb_list);
         spin_lock_init(&vc4->job_lock);
  
         INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c

index 5bf134968adecfc6fd2964a32836a61c011d0dfa..1e7bdda556984b41da3fcfeafdc59a0a1ecbcfdd 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -284,9 +284,6 @@ validate_indexed_prim_list(VALIDATE_ARGS)
         if (!ib)
                 return -EINVAL;
  
-       exec->bin_dep_seqno = max(exec->bin_dep_seqno,
-                                 to_vc4_bo(&ib->base)->write_seqno);
-
         if (offset > ib->base.size ||
             (ib->base.size - offset) / index_size < length) {
                 DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
@@ -738,11 +735,6 @@ reloc_tex(struct vc4_exec_info *exec,
  
         *validated_p0 = tex->dma_addr + p0;
  
-       if (is_cs) {
-               exec->bin_dep_seqno = max(exec->bin_dep_seqno,
-                                         to_vc4_bo(&tex->base)->write_seqno);
-       }
-
         return true;
   fail:
         DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
@@ -904,9 +896,6 @@ validate_gl_shader_rec(struct drm_device *dev,
                 uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
                 uint32_t max_index;
  
-               exec->bin_dep_seqno = max(exec->bin_dep_seqno,
-                                         to_vc4_bo(&vbo->base)->write_seqno);
-
                 if (state->addr & 0x8)
                         stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
author	Maíra Canal <mcanal@igalia.com>
	Fri, 20 Dec 2024 13:37:09 +0000 (10:37 -0300)
committer	Maíra Canal <mcanal@igalia.com>
	Sun, 19 Jan 2025 18:31:00 +0000 (15:31 -0300)
drivers/gpu/drm/vc4/vc4_crtc.c		patch \| blob \| blame \| history
drivers/gpu/drm/vc4/vc4_drv.h		patch \| blob \| blame \| history
drivers/gpu/drm/vc4/vc4_gem.c		patch \| blob \| blame \| history
drivers/gpu/drm/vc4/vc4_validate.c		patch \| blob \| blame \| history