drm/i915: Only wait on a pending flip if we intend to write to the buffer

author Chris Wilson <chris@chris-wilson.co.uk>

Sun, 6 Mar 2011 13:51:29 +0000 (13:51 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Mon, 7 Mar 2011 11:06:02 +0000 (11:06 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Sun, 6 Mar 2011 13:51:29 +0000 (13:51 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Mon, 7 Mar 2011 11:06:02 +0000 (11:06 +0000)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

index d461ad5f929020c0f2b8c4d96fee44f302815bc5..8513c04dc892f71555d8601a1b5b92272eae8821 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -37,6 +37,7 @@ struct change_domains {
         uint32_t invalidate_domains;
         uint32_t flush_domains;
         uint32_t flush_rings;
+       uint32_t flips;
  };
  
  /*
@@ -190,6 +191,9 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
                 i915_gem_release_mmap(obj);
  
+       if (obj->base.pending_write_domain)
+               cd->flips |= atomic_read(&obj->pending_flip);
+
         /* The actual obj->write_domain will be updated with
          * pending_write_domain after we emit the accumulated flush for all
          * of our domain changes in execbuffers (which clears objects'
@@ -773,6 +777,39 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
         return intel_ring_sync(to, from, seqno - 1);
  }
  
+static int
+i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
+{
+       u32 plane, flip_mask;
+       int ret;
+
+       /* Check for any pending flips. As we only maintain a flip queue depth
+        * of 1, we can simply insert a WAIT for the next display flip prior
+        * to executing the batch and avoid stalling the CPU.
+        */
+
+       for (plane = 0; flips >> plane; plane++) {
+               if (((flips >> plane) & 1) == 0)
+                       continue;
+
+               if (plane)
+                       flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
+               else
+                       flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+
+               ret = intel_ring_begin(ring, 2);
+               if (ret)
+                       return ret;
+
+               intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
+               intel_ring_emit(ring, MI_NOOP);
+               intel_ring_advance(ring);
+       }
+
+       return 0;
+}
+
+
  static int
  i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
                                 struct list_head *objects)
@@ -781,9 +818,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
         struct change_domains cd;
         int ret;
  
-       cd.invalidate_domains = 0;
-       cd.flush_domains = 0;
-       cd.flush_rings = 0;
+       memset(&cd, 0, sizeof(cd));
         list_for_each_entry(obj, objects, exec_list)
                 i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
  
@@ -796,6 +831,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
                         return ret;
         }
  
+       if (cd.flips) {
+               ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
+               if (ret)
+                       return ret;
+       }
+
         list_for_each_entry(obj, objects, exec_list) {
                 ret = i915_gem_execbuffer_sync_rings(obj, ring);
                 if (ret)
@@ -842,47 +883,6 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
         return 0;
  }
  
-static int
-i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring,
-                                  struct list_head *objects)
-{
-       struct drm_i915_gem_object *obj;
-       int flips;
-
-       /* Check for any pending flips. As we only maintain a flip queue depth
-        * of 1, we can simply insert a WAIT for the next display flip prior
-        * to executing the batch and avoid stalling the CPU.
-        */
-       flips = 0;
-       list_for_each_entry(obj, objects, exec_list) {
-               if (obj->base.write_domain)
-                       flips |= atomic_read(&obj->pending_flip);
-       }
-       if (flips) {
-               int plane, flip_mask, ret;
-
-               for (plane = 0; flips >> plane; plane++) {
-                       if (((flips >> plane) & 1) == 0)
-                               continue;
-
-                       if (plane)
-                               flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
-                       else
-                               flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
-
-                       ret = intel_ring_begin(ring, 2);
-                       if (ret)
-                               return ret;
-
-                       intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
-                       intel_ring_emit(ring, MI_NOOP);
-                       intel_ring_advance(ring);
-               }
-       }
-
-       return 0;
-}
-
  static void
  i915_gem_execbuffer_move_to_active(struct list_head *objects,
                                    struct intel_ring_buffer *ring,
@@ -1133,10 +1133,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
         if (ret)
                 goto err;
  
-       ret = i915_gem_execbuffer_wait_for_flips(ring, &objects);
-       if (ret)
-               goto err;
-
         seqno = i915_gem_next_request_seqno(ring);
         for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
                 if (seqno < ring->sync_seqno[i]) {
author	Chris Wilson <chris@chris-wilson.co.uk>
	Sun, 6 Mar 2011 13:51:29 +0000 (13:51 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Mon, 7 Mar 2011 11:06:02 +0000 (11:06 +0000)