Merge remote-tracking branch 'airlied/drm-next' into drm-intel-next-queued

[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_gem.c
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index f4f8eaa90f2a38951d0ad33a40d5edbbc8506a9f..f48c45080a6592eb7ae0863638426225b2c6c74b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -279,16 +279,25 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
         .release = i915_gem_object_release_phys,
  };
  
-int
-i915_gem_object_unbind(struct drm_i915_gem_object *obj)
+int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
  {
         struct i915_vma *vma;
         LIST_HEAD(still_in_list);
         int ret;
  
-       /* The vma will only be freed if it is marked as closed, and if we wait
-        * upon rendering to the vma, we may unbind anything in the list.
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+       /* Closed vma are removed from the obj->vma_list - but they may
+        * still have an active binding on the object. To remove those we
+        * must wait for all rendering to complete to the object (as unbinding
+        * must anyway), and retire the requests.
          */
+       ret = i915_gem_object_wait_rendering(obj, false);
+       if (ret)
+               return ret;
+
+       i915_gem_retire_requests(to_i915(obj->base.dev));
+
         while ((vma = list_first_entry_or_null(&obj->vma_list,
                                                struct i915_vma,
                                                obj_link))) {
@@ -2077,6 +2086,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
         list_del(&obj->global_list);
  
         if (obj->mapping) {
+               /* low bits are ignored by is_vmalloc_addr and kmap_to_page */
                 if (is_vmalloc_addr(obj->mapping))
                         vunmap(obj->mapping);
                 else
@@ -2253,7 +2263,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
  }
  
  /* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+                                enum i915_map_type type)
  {
         unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
         struct sg_table *sgt = obj->pages;
@@ -2262,10 +2273,11 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
         struct page *stack_pages[32];
         struct page **pages = stack_pages;
         unsigned long i = 0;
+       pgprot_t pgprot;
         void *addr;
  
         /* A single page can always be kmapped */
-       if (n_pages == 1)
+       if (n_pages == 1 && type == I915_MAP_WB)
                 return kmap(sg_page(sgt->sgl));
  
         if (n_pages > ARRAY_SIZE(stack_pages)) {
@@ -2281,7 +2293,15 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
         /* Check that we have the expected number of pages */
         GEM_BUG_ON(i != n_pages);
  
-       addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+       switch (type) {
+       case I915_MAP_WB:
+               pgprot = PAGE_KERNEL;
+               break;
+       case I915_MAP_WC:
+               pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+               break;
+       }
+       addr = vmap(pages, n_pages, 0, pgprot);
  
         if (pages != stack_pages)
                 drm_free_large(pages);
@@ -2290,27 +2310,54 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
  }
  
  /* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+                             enum i915_map_type type)
  {
+       enum i915_map_type has_type;
+       bool pinned;
+       void *ptr;
         int ret;
  
         lockdep_assert_held(&obj->base.dev->struct_mutex);
+       GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
  
         ret = i915_gem_object_get_pages(obj);
         if (ret)
                 return ERR_PTR(ret);
  
         i915_gem_object_pin_pages(obj);
+       pinned = obj->pages_pin_count > 1;
  
-       if (!obj->mapping) {
-               obj->mapping = i915_gem_object_map(obj);
-               if (!obj->mapping) {
-                       i915_gem_object_unpin_pages(obj);
-                       return ERR_PTR(-ENOMEM);
+       ptr = ptr_unpack_bits(obj->mapping, has_type);
+       if (ptr && has_type != type) {
+               if (pinned) {
+                       ret = -EBUSY;
+                       goto err;
+               }
+
+               if (is_vmalloc_addr(ptr))
+                       vunmap(ptr);
+               else
+                       kunmap(kmap_to_page(ptr));
+
+               ptr = obj->mapping = NULL;
+       }
+
+       if (!ptr) {
+               ptr = i915_gem_object_map(obj, type);
+               if (!ptr) {
+                       ret = -ENOMEM;
+                       goto err;
                 }
+
+               obj->mapping = ptr_pack_bits(ptr, type);
         }
  
-       return obj->mapping;
+       return ptr;
+
+err:
+       i915_gem_object_unpin_pages(obj);
+       return ERR_PTR(ret);
  }
  
  static void
@@ -2423,15 +2470,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
         struct drm_i915_gem_request *request;
         struct intel_ring *ring;
  
-       request = i915_gem_active_peek(&engine->last_request,
-                                      &engine->i915->drm.struct_mutex);
-
         /* Mark all pending requests as complete so that any concurrent
          * (lockless) lookup doesn't try and wait upon the request as we
          * reset it.
          */
-       if (request)
-               intel_engine_init_seqno(engine, request->fence.seqno);
+       intel_engine_init_seqno(engine, engine->last_submitted_seqno);
  
         /*
          * Clear the execlists queue up before freeing the requests, as those
@@ -2453,6 +2496,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
          * implicit references on things like e.g. ppgtt address spaces through
          * the request.
          */
+       request = i915_gem_active_raw(&engine->last_request,
+                                     &engine->i915->drm.struct_mutex);
         if (request)
                 i915_gem_request_retire_upto(request);
         GEM_BUG_ON(intel_engine_is_active(engine));
@@ -2526,7 +2571,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
         struct drm_device *dev = &dev_priv->drm;
         struct intel_engine_cs *engine;
-       unsigned int stuck_engines;
         bool rearm_hangcheck;
  
         if (!READ_ONCE(dev_priv->gt.awake))
@@ -2556,15 +2600,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
         dev_priv->gt.awake = false;
         rearm_hangcheck = false;
  
-       /* As we have disabled hangcheck, we need to unstick any waiters still
-        * hanging around. However, as we may be racing against the interrupt
-        * handler or the waiters themselves, we skip enabling the fake-irq.
-        */
-       stuck_engines = intel_kick_waiters(dev_priv);
-       if (unlikely(stuck_engines))
-               DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
-                                stuck_engines);
-
         if (INTEL_GEN(dev_priv) >= 6)
                 gen6_rps_idle(dev_priv);
         intel_runtime_pm_put(dev_priv);
@@ -3735,7 +3770,7 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
         i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
  }
  
-static __always_inline unsigned __busy_read_flag(unsigned int id)
+static __always_inline unsigned int __busy_read_flag(unsigned int id)
  {
         /* Note that we could alias engines in the execbuf API, but
          * that would be very unwise as it prevents userspace from
@@ -3750,10 +3785,18 @@ static __always_inline unsigned __busy_read_flag(unsigned int id)
  
  static __always_inline unsigned int __busy_write_id(unsigned int id)
  {
-       return id;
+       /* The uABI guarantees an active writer is also amongst the read
+        * engines. This would be true if we accessed the activity tracking
+        * under the lock, but as we perform the lookup of the object and
+        * its activity locklessly we can not guarantee that the last_write
+        * being active implies that we have set the same engine flag from
+        * last_read - hence we always set both read and write busy for
+        * last_write.
+        */
+       return id | __busy_read_flag(id);
  }
  
-static __always_inline unsigned
+static __always_inline unsigned int
  __busy_set_if_active(const struct i915_gem_active *active,
                      unsigned int (*flag)(unsigned int id))
  {
@@ -3770,19 +3813,45 @@ __busy_set_if_active(const struct i915_gem_active *active,
  
                 id = request->engine->exec_id;
  
-               /* Check that the pointer wasn't reassigned and overwritten. */
+               /* Check that the pointer wasn't reassigned and overwritten.
+                *
+                * In __i915_gem_active_get_rcu(), we enforce ordering between
+                * the first rcu pointer dereference (imposing a
+                * read-dependency only on access through the pointer) and
+                * the second lockless access through the memory barrier
+                * following a successful atomic_inc_not_zero(). Here there
+                * is no such barrier, and so we must manually insert an
+                * explicit read barrier to ensure that the following
+                * access occurs after all the loads through the first
+                * pointer.
+                *
+                * It is worth comparing this sequence with
+                * raw_write_seqcount_latch() which operates very similarly.
+                * The challenge here is the visibility of the other CPU
+                * writes to the reallocated request vs the local CPU ordering.
+                * Before the other CPU can overwrite the request, it will
+                * have updated our active->request and gone through a wmb.
+                * During the read here, we want to make sure that the values
+                * we see have not been overwritten as we do so - and we do
+                * that by serialising the second pointer check with the writes
+                * on other other CPUs.
+                *
+                * The corresponding write barrier is part of
+                * rcu_assign_pointer().
+                */
+               smp_rmb();
                 if (request == rcu_access_pointer(active->request))
                         return flag(id);
         } while (1);
  }
  
-static inline unsigned
+static __always_inline unsigned int
  busy_check_reader(const struct i915_gem_active *active)
  {
         return __busy_set_if_active(active, __busy_read_flag);
  }
  
-static inline unsigned
+static __always_inline unsigned int
  busy_check_writer(const struct i915_gem_active *active)
  {
         return __busy_set_if_active(active, __busy_write_id);
@@ -3833,9 +3902,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
                         args->busy |= busy_check_reader(&obj->last_read[idx]);
  
                 /* For ABI sanity, we only care that the write engine is in
-                * the set of read engines. This is ensured by the ordering
-                * of setting last_read/last_write in i915_vma_move_to_active,
-                * and then in reverse in retire.
+                * the set of read engines. This should be ensured by the
+                * ordering of setting last_read/last_write in
+                * i915_vma_move_to_active(), and then in reverse in retire.
+                * However, for good measure, we always report the last_write
+                * request as a busy read as well as being a busy write.
                  *
                  * We don't care that the set of active read/write engines
                  * may change during construction of the result, as it is