drivers/gpu/drm/i915/gt/intel_ring.c

   1 /*
   2  * SPDX-License-Identifier: MIT
   3  *
   4  * Copyright © 2019 Intel Corporation
   5  */
   6
   7 #include "gem/i915_gem_object.h"
   8 #include "i915_drv.h"
   9 #include "i915_vma.h"
  10 #include "intel_engine.h"
  11 #include "intel_ring.h"
  12 #include "intel_timeline.h"
  13
  14 unsigned int intel_ring_update_space(struct intel_ring *ring)
  15 {
  16         unsigned int space;
  17
  18         space = __intel_ring_space(ring->head, ring->emit, ring->size);
  19
  20         ring->space = space;
  21         return space;
  22 }
  23
  24 int intel_ring_pin(struct intel_ring *ring)
  25 {
  26         struct i915_vma *vma = ring->vma;
  27         unsigned int flags;
  28         void *addr;
  29         int ret;
  30
  31         if (atomic_fetch_inc(&ring->pin_count))
  32                 return 0;
  33
  34         flags = PIN_GLOBAL;
  35
  36         /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
  37         flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
  38
  39         if (vma->obj->stolen)
  40                 flags |= PIN_MAPPABLE;
  41         else
  42                 flags |= PIN_HIGH;
  43
  44         ret = i915_vma_pin(vma, 0, 0, flags);
  45         if (unlikely(ret))
  46                 goto err_unpin;
  47
  48         if (i915_vma_is_map_and_fenceable(vma))
  49                 addr = (void __force *)i915_vma_pin_iomap(vma);
  50         else
  51                 addr = i915_gem_object_pin_map(vma->obj,
  52                                                i915_coherent_map_type(vma->vm->i915));
  53         if (IS_ERR(addr)) {
  54                 ret = PTR_ERR(addr);
  55                 goto err_ring;
  56         }
  57
  58         i915_vma_make_unshrinkable(vma);
  59
  60         /* Discard any unused bytes beyond that submitted to hw. */
  61         intel_ring_reset(ring, ring->emit);
  62
  63         ring->vaddr = addr;
  64         return 0;
  65
  66 err_ring:
  67         i915_vma_unpin(vma);
  68 err_unpin:
  69         atomic_dec(&ring->pin_count);
  70         return ret;
  71 }
  72
  73 void intel_ring_reset(struct intel_ring *ring, u32 tail)
  74 {
  75         tail = intel_ring_wrap(ring, tail);
  76         ring->tail = tail;
  77         ring->head = tail;
  78         ring->emit = tail;
  79         intel_ring_update_space(ring);
  80 }
  81
  82 void intel_ring_unpin(struct intel_ring *ring)
  83 {
  84         struct i915_vma *vma = ring->vma;
  85
  86         if (!atomic_dec_and_test(&ring->pin_count))
  87                 return;
  88
  89         i915_vma_unset_ggtt_write(vma);
  90         if (i915_vma_is_map_and_fenceable(vma))
  91                 i915_vma_unpin_iomap(vma);
  92         else
  93                 i915_gem_object_unpin_map(vma->obj);
  94
  95         i915_vma_make_purgeable(vma);
  96         i915_vma_unpin(vma);
  97 }
  98
  99 static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
 100 {
 101         struct i915_address_space *vm = &ggtt->vm;
 102         struct drm_i915_private *i915 = vm->i915;
 103         struct drm_i915_gem_object *obj;
 104         struct i915_vma *vma;
 105
 106         obj = ERR_PTR(-ENODEV);
 107         if (i915_ggtt_has_aperture(ggtt))
 108                 obj = i915_gem_object_create_stolen(i915, size);
 109         if (IS_ERR(obj))
 110                 obj = i915_gem_object_create_internal(i915, size);
 111         if (IS_ERR(obj))
 112                 return ERR_CAST(obj);
 113
 114         /*
 115          * Mark ring buffers as read-only from GPU side (so no stray overwrites)
 116          * if supported by the platform's GGTT.
 117          */
 118         if (vm->has_read_only)
 119                 i915_gem_object_set_readonly(obj);
 120
 121         vma = i915_vma_instance(obj, vm, NULL);
 122         if (IS_ERR(vma))
 123                 goto err;
 124
 125         return vma;
 126
 127 err:
 128         i915_gem_object_put(obj);
 129         return vma;
 130 }
 131
 132 struct intel_ring *
 133 intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 134 {
 135         struct drm_i915_private *i915 = engine->i915;
 136         struct intel_ring *ring;
 137         struct i915_vma *vma;
 138
 139         GEM_BUG_ON(!is_power_of_2(size));
 140         GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
 141
 142         ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 143         if (!ring)
 144                 return ERR_PTR(-ENOMEM);
 145
 146         kref_init(&ring->ref);
 147         ring->size = size;
 148
 149         /*
 150          * Workaround an erratum on the i830 which causes a hang if
 151          * the TAIL pointer points to within the last 2 cachelines
 152          * of the buffer.
 153          */
 154         ring->effective_size = size;
 155         if (IS_I830(i915) || IS_I845G(i915))
 156                 ring->effective_size -= 2 * CACHELINE_BYTES;
 157
 158         intel_ring_update_space(ring);
 159
 160         vma = create_ring_vma(engine->gt->ggtt, size);
 161         if (IS_ERR(vma)) {
 162                 kfree(ring);
 163                 return ERR_CAST(vma);
 164         }
 165         ring->vma = vma;
 166
 167         return ring;
 168 }
 169
 170 void intel_ring_free(struct kref *ref)
 171 {
 172         struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
 173
 174         i915_vma_put(ring->vma);
 175         kfree(ring);
 176 }
 177
 178 static noinline int
 179 wait_for_space(struct intel_ring *ring,
 180                struct intel_timeline *tl,
 181                unsigned int bytes)
 182 {
 183         struct i915_request *target;
 184         long timeout;
 185
 186         if (intel_ring_update_space(ring) >= bytes)
 187                 return 0;
 188
 189         GEM_BUG_ON(list_empty(&tl->requests));
 190         list_for_each_entry(target, &tl->requests, link) {
 191                 if (target->ring != ring)
 192                         continue;
 193
 194                 /* Would completion of this request free enough space? */
 195                 if (bytes <= __intel_ring_space(target->postfix,
 196                                                 ring->emit, ring->size))
 197                         break;
 198         }
 199
 200         if (GEM_WARN_ON(&target->link == &tl->requests))
 201                 return -ENOSPC;
 202
 203         timeout = i915_request_wait(target,
 204                                     I915_WAIT_INTERRUPTIBLE,
 205                                     MAX_SCHEDULE_TIMEOUT);
 206         if (timeout < 0)
 207                 return timeout;
 208
 209         i915_request_retire_upto(target);
 210
 211         intel_ring_update_space(ring);
 212         GEM_BUG_ON(ring->space < bytes);
 213         return 0;
 214 }
 215
 216 u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
 217 {
 218         struct intel_ring *ring = rq->ring;
 219         const unsigned int remain_usable = ring->effective_size - ring->emit;
 220         const unsigned int bytes = num_dwords * sizeof(u32);
 221         unsigned int need_wrap = 0;
 222         unsigned int total_bytes;
 223         u32 *cs;
 224
 225         /* Packets must be qword aligned. */
 226         GEM_BUG_ON(num_dwords & 1);
 227
 228         total_bytes = bytes + rq->reserved_space;
 229         GEM_BUG_ON(total_bytes > ring->effective_size);
 230
 231         if (unlikely(total_bytes > remain_usable)) {
 232                 const int remain_actual = ring->size - ring->emit;
 233
 234                 if (bytes > remain_usable) {
 235                         /*
 236                          * Not enough space for the basic request. So need to
 237                          * flush out the remainder and then wait for
 238                          * base + reserved.
 239                          */
 240                         total_bytes += remain_actual;
 241                         need_wrap = remain_actual | 1;
 242                 } else  {
 243                         /*
 244                          * The base request will fit but the reserved space
 245                          * falls off the end. So we don't need an immediate
 246                          * wrap and only need to effectively wait for the
 247                          * reserved size from the start of ringbuffer.
 248                          */
 249                         total_bytes = rq->reserved_space + remain_actual;
 250                 }
 251         }
 252
 253         if (unlikely(total_bytes > ring->space)) {
 254                 int ret;
 255
 256                 /*
 257                  * Space is reserved in the ringbuffer for finalising the
 258                  * request, as that cannot be allowed to fail. During request
 259                  * finalisation, reserved_space is set to 0 to stop the
 260                  * overallocation and the assumption is that then we never need
 261                  * to wait (which has the risk of failing with EINTR).
 262                  *
 263                  * See also i915_request_alloc() and i915_request_add().
 264                  */
 265                 GEM_BUG_ON(!rq->reserved_space);
 266
 267                 ret = wait_for_space(ring,
 268                                      i915_request_timeline(rq),
 269                                      total_bytes);
 270                 if (unlikely(ret))
 271                         return ERR_PTR(ret);
 272         }
 273
 274         if (unlikely(need_wrap)) {
 275                 need_wrap &= ~1;
 276                 GEM_BUG_ON(need_wrap > ring->space);
 277                 GEM_BUG_ON(ring->emit + need_wrap > ring->size);
 278                 GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
 279
 280                 /* Fill the tail with MI_NOOP */
 281                 memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
 282                 ring->space -= need_wrap;
 283                 ring->emit = 0;
 284         }
 285
 286         GEM_BUG_ON(ring->emit > ring->size - bytes);
 287         GEM_BUG_ON(ring->space < bytes);
 288         cs = ring->vaddr + ring->emit;
 289         GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
 290         ring->emit += bytes;
 291         ring->space -= bytes;
 292
 293         return cs;
 294 }
 295
 296 /* Align the ring tail to a cacheline boundary */
 297 int intel_ring_cacheline_align(struct i915_request *rq)
 298 {
 299         int num_dwords;
 300         void *cs;
 301
 302         num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
 303         if (num_dwords == 0)
 304                 return 0;
 305
 306         num_dwords = CACHELINE_DWORDS - num_dwords;
 307         GEM_BUG_ON(num_dwords & 1);
 308
 309         cs = intel_ring_begin(rq, num_dwords);
 310         if (IS_ERR(cs))
 311                 return PTR_ERR(cs);
 312
 313         memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
 314         intel_ring_advance(rq, cs + num_dwords);
 315
 316         GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
 317         return 0;
 318 }