Merge branch 'for-airlied' of git://people.freedesktop.org/~danvet/drm-intel into...
[linux-2.6-block.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/dma_remapping.h>
35
36 struct eb_objects {
37         int and;
38         struct hlist_head buckets[0];
39 };
40
41 static struct eb_objects *
42 eb_create(int size)
43 {
44         struct eb_objects *eb;
45         int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
46         BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
47         while (count > size)
48                 count >>= 1;
49         eb = kzalloc(count*sizeof(struct hlist_head) +
50                      sizeof(struct eb_objects),
51                      GFP_KERNEL);
52         if (eb == NULL)
53                 return eb;
54
55         eb->and = count - 1;
56         return eb;
57 }
58
59 static void
60 eb_reset(struct eb_objects *eb)
61 {
62         memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
63 }
64
65 static void
66 eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
67 {
68         hlist_add_head(&obj->exec_node,
69                        &eb->buckets[obj->exec_handle & eb->and]);
70 }
71
72 static struct drm_i915_gem_object *
73 eb_get_object(struct eb_objects *eb, unsigned long handle)
74 {
75         struct hlist_head *head;
76         struct hlist_node *node;
77         struct drm_i915_gem_object *obj;
78
79         head = &eb->buckets[handle & eb->and];
80         hlist_for_each(node, head) {
81                 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
82                 if (obj->exec_handle == handle)
83                         return obj;
84         }
85
86         return NULL;
87 }
88
89 static void
90 eb_destroy(struct eb_objects *eb)
91 {
92         kfree(eb);
93 }
94
95 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
96 {
97         return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
98                 !obj->map_and_fenceable ||
99                 obj->cache_level != I915_CACHE_NONE);
100 }
101
102 static int
103 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
104                                    struct eb_objects *eb,
105                                    struct drm_i915_gem_relocation_entry *reloc)
106 {
107         struct drm_device *dev = obj->base.dev;
108         struct drm_gem_object *target_obj;
109         struct drm_i915_gem_object *target_i915_obj;
110         uint32_t target_offset;
111         int ret = -EINVAL;
112
113         /* we've already hold a reference to all valid objects */
114         target_obj = &eb_get_object(eb, reloc->target_handle)->base;
115         if (unlikely(target_obj == NULL))
116                 return -ENOENT;
117
118         target_i915_obj = to_intel_bo(target_obj);
119         target_offset = target_i915_obj->gtt_offset;
120
121         /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
122          * pipe_control writes because the gpu doesn't properly redirect them
123          * through the ppgtt for non_secure batchbuffers. */
124         if (unlikely(IS_GEN6(dev) &&
125             reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
126             !target_i915_obj->has_global_gtt_mapping)) {
127                 i915_gem_gtt_bind_object(target_i915_obj,
128                                          target_i915_obj->cache_level);
129         }
130
131         /* The target buffer should have appeared before us in the
132          * exec_object list, so it should have a GTT space bound by now.
133          */
134         if (unlikely(target_offset == 0)) {
135                 DRM_DEBUG("No GTT space found for object %d\n",
136                           reloc->target_handle);
137                 return ret;
138         }
139
140         /* Validate that the target is in a valid r/w GPU domain */
141         if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
142                 DRM_DEBUG("reloc with multiple write domains: "
143                           "obj %p target %d offset %d "
144                           "read %08x write %08x",
145                           obj, reloc->target_handle,
146                           (int) reloc->offset,
147                           reloc->read_domains,
148                           reloc->write_domain);
149                 return ret;
150         }
151         if (unlikely((reloc->write_domain | reloc->read_domains)
152                      & ~I915_GEM_GPU_DOMAINS)) {
153                 DRM_DEBUG("reloc with read/write non-GPU domains: "
154                           "obj %p target %d offset %d "
155                           "read %08x write %08x",
156                           obj, reloc->target_handle,
157                           (int) reloc->offset,
158                           reloc->read_domains,
159                           reloc->write_domain);
160                 return ret;
161         }
162         if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
163                      reloc->write_domain != target_obj->pending_write_domain)) {
164                 DRM_DEBUG("Write domain conflict: "
165                           "obj %p target %d offset %d "
166                           "new %08x old %08x\n",
167                           obj, reloc->target_handle,
168                           (int) reloc->offset,
169                           reloc->write_domain,
170                           target_obj->pending_write_domain);
171                 return ret;
172         }
173
174         target_obj->pending_read_domains |= reloc->read_domains;
175         target_obj->pending_write_domain |= reloc->write_domain;
176
177         /* If the relocation already has the right value in it, no
178          * more work needs to be done.
179          */
180         if (target_offset == reloc->presumed_offset)
181                 return 0;
182
183         /* Check that the relocation address is valid... */
184         if (unlikely(reloc->offset > obj->base.size - 4)) {
185                 DRM_DEBUG("Relocation beyond object bounds: "
186                           "obj %p target %d offset %d size %d.\n",
187                           obj, reloc->target_handle,
188                           (int) reloc->offset,
189                           (int) obj->base.size);
190                 return ret;
191         }
192         if (unlikely(reloc->offset & 3)) {
193                 DRM_DEBUG("Relocation not 4-byte aligned: "
194                           "obj %p target %d offset %d.\n",
195                           obj, reloc->target_handle,
196                           (int) reloc->offset);
197                 return ret;
198         }
199
200         /* We can't wait for rendering with pagefaults disabled */
201         if (obj->active && in_atomic())
202                 return -EFAULT;
203
204         reloc->delta += target_offset;
205         if (use_cpu_reloc(obj)) {
206                 uint32_t page_offset = reloc->offset & ~PAGE_MASK;
207                 char *vaddr;
208
209                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
210                 if (ret)
211                         return ret;
212
213                 vaddr = kmap_atomic(i915_gem_object_get_page(obj,
214                                                              reloc->offset >> PAGE_SHIFT));
215                 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
216                 kunmap_atomic(vaddr);
217         } else {
218                 struct drm_i915_private *dev_priv = dev->dev_private;
219                 uint32_t __iomem *reloc_entry;
220                 void __iomem *reloc_page;
221
222                 ret = i915_gem_object_set_to_gtt_domain(obj, true);
223                 if (ret)
224                         return ret;
225
226                 ret = i915_gem_object_put_fence(obj);
227                 if (ret)
228                         return ret;
229
230                 /* Map the page containing the relocation we're going to perform.  */
231                 reloc->offset += obj->gtt_offset;
232                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
233                                                       reloc->offset & PAGE_MASK);
234                 reloc_entry = (uint32_t __iomem *)
235                         (reloc_page + (reloc->offset & ~PAGE_MASK));
236                 iowrite32(reloc->delta, reloc_entry);
237                 io_mapping_unmap_atomic(reloc_page);
238         }
239
240         /* and update the user's relocation entry */
241         reloc->presumed_offset = target_offset;
242
243         return 0;
244 }
245
246 static int
247 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
248                                     struct eb_objects *eb)
249 {
250 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
251         struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
252         struct drm_i915_gem_relocation_entry __user *user_relocs;
253         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
254         int remain, ret;
255
256         user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
257
258         remain = entry->relocation_count;
259         while (remain) {
260                 struct drm_i915_gem_relocation_entry *r = stack_reloc;
261                 int count = remain;
262                 if (count > ARRAY_SIZE(stack_reloc))
263                         count = ARRAY_SIZE(stack_reloc);
264                 remain -= count;
265
266                 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
267                         return -EFAULT;
268
269                 do {
270                         u64 offset = r->presumed_offset;
271
272                         ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
273                         if (ret)
274                                 return ret;
275
276                         if (r->presumed_offset != offset &&
277                             __copy_to_user_inatomic(&user_relocs->presumed_offset,
278                                                     &r->presumed_offset,
279                                                     sizeof(r->presumed_offset))) {
280                                 return -EFAULT;
281                         }
282
283                         user_relocs++;
284                         r++;
285                 } while (--count);
286         }
287
288         return 0;
289 #undef N_RELOC
290 }
291
292 static int
293 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
294                                          struct eb_objects *eb,
295                                          struct drm_i915_gem_relocation_entry *relocs)
296 {
297         const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
298         int i, ret;
299
300         for (i = 0; i < entry->relocation_count; i++) {
301                 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
302                 if (ret)
303                         return ret;
304         }
305
306         return 0;
307 }
308
309 static int
310 i915_gem_execbuffer_relocate(struct drm_device *dev,
311                              struct eb_objects *eb,
312                              struct list_head *objects)
313 {
314         struct drm_i915_gem_object *obj;
315         int ret = 0;
316
317         /* This is the fast path and we cannot handle a pagefault whilst
318          * holding the struct mutex lest the user pass in the relocations
319          * contained within a mmaped bo. For in such a case we, the page
320          * fault handler would call i915_gem_fault() and we would try to
321          * acquire the struct mutex again. Obviously this is bad and so
322          * lockdep complains vehemently.
323          */
324         pagefault_disable();
325         list_for_each_entry(obj, objects, exec_list) {
326                 ret = i915_gem_execbuffer_relocate_object(obj, eb);
327                 if (ret)
328                         break;
329         }
330         pagefault_enable();
331
332         return ret;
333 }
334
335 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
336 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
337
338 static int
339 need_reloc_mappable(struct drm_i915_gem_object *obj)
340 {
341         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
342         return entry->relocation_count && !use_cpu_reloc(obj);
343 }
344
345 static int
346 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
347                                    struct intel_ring_buffer *ring)
348 {
349         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
350         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
351         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
352         bool need_fence, need_mappable;
353         int ret;
354
355         need_fence =
356                 has_fenced_gpu_access &&
357                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
358                 obj->tiling_mode != I915_TILING_NONE;
359         need_mappable = need_fence || need_reloc_mappable(obj);
360
361         ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
362         if (ret)
363                 return ret;
364
365         entry->flags |= __EXEC_OBJECT_HAS_PIN;
366
367         if (has_fenced_gpu_access) {
368                 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
369                         ret = i915_gem_object_get_fence(obj);
370                         if (ret)
371                                 return ret;
372
373                         if (i915_gem_object_pin_fence(obj))
374                                 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
375
376                         obj->pending_fenced_gpu_access = true;
377                 }
378         }
379
380         /* Ensure ppgtt mapping exists if needed */
381         if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
382                 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
383                                        obj, obj->cache_level);
384
385                 obj->has_aliasing_ppgtt_mapping = 1;
386         }
387
388         entry->offset = obj->gtt_offset;
389         return 0;
390 }
391
392 static void
393 i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
394 {
395         struct drm_i915_gem_exec_object2 *entry;
396
397         if (!obj->gtt_space)
398                 return;
399
400         entry = obj->exec_entry;
401
402         if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
403                 i915_gem_object_unpin_fence(obj);
404
405         if (entry->flags & __EXEC_OBJECT_HAS_PIN)
406                 i915_gem_object_unpin(obj);
407
408         entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
409 }
410
411 static int
412 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
413                             struct drm_file *file,
414                             struct list_head *objects)
415 {
416         struct drm_i915_gem_object *obj;
417         struct list_head ordered_objects;
418         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
419         int retry;
420
421         INIT_LIST_HEAD(&ordered_objects);
422         while (!list_empty(objects)) {
423                 struct drm_i915_gem_exec_object2 *entry;
424                 bool need_fence, need_mappable;
425
426                 obj = list_first_entry(objects,
427                                        struct drm_i915_gem_object,
428                                        exec_list);
429                 entry = obj->exec_entry;
430
431                 need_fence =
432                         has_fenced_gpu_access &&
433                         entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
434                         obj->tiling_mode != I915_TILING_NONE;
435                 need_mappable = need_fence || need_reloc_mappable(obj);
436
437                 if (need_mappable)
438                         list_move(&obj->exec_list, &ordered_objects);
439                 else
440                         list_move_tail(&obj->exec_list, &ordered_objects);
441
442                 obj->base.pending_read_domains = 0;
443                 obj->base.pending_write_domain = 0;
444                 obj->pending_fenced_gpu_access = false;
445         }
446         list_splice(&ordered_objects, objects);
447
448         /* Attempt to pin all of the buffers into the GTT.
449          * This is done in 3 phases:
450          *
451          * 1a. Unbind all objects that do not match the GTT constraints for
452          *     the execbuffer (fenceable, mappable, alignment etc).
453          * 1b. Increment pin count for already bound objects.
454          * 2.  Bind new objects.
455          * 3.  Decrement pin count.
456          *
457          * This avoid unnecessary unbinding of later objects in order to make
458          * room for the earlier objects *unless* we need to defragment.
459          */
460         retry = 0;
461         do {
462                 int ret = 0;
463
464                 /* Unbind any ill-fitting objects or pin. */
465                 list_for_each_entry(obj, objects, exec_list) {
466                         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
467                         bool need_fence, need_mappable;
468
469                         if (!obj->gtt_space)
470                                 continue;
471
472                         need_fence =
473                                 has_fenced_gpu_access &&
474                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
475                                 obj->tiling_mode != I915_TILING_NONE;
476                         need_mappable = need_fence || need_reloc_mappable(obj);
477
478                         if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
479                             (need_mappable && !obj->map_and_fenceable))
480                                 ret = i915_gem_object_unbind(obj);
481                         else
482                                 ret = i915_gem_execbuffer_reserve_object(obj, ring);
483                         if (ret)
484                                 goto err;
485                 }
486
487                 /* Bind fresh objects */
488                 list_for_each_entry(obj, objects, exec_list) {
489                         if (obj->gtt_space)
490                                 continue;
491
492                         ret = i915_gem_execbuffer_reserve_object(obj, ring);
493                         if (ret)
494                                 goto err;
495                 }
496
497 err:            /* Decrement pin count for bound objects */
498                 list_for_each_entry(obj, objects, exec_list)
499                         i915_gem_execbuffer_unreserve_object(obj);
500
501                 if (ret != -ENOSPC || retry++)
502                         return ret;
503
504                 ret = i915_gem_evict_everything(ring->dev);
505                 if (ret)
506                         return ret;
507         } while (1);
508 }
509
510 static int
511 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
512                                   struct drm_file *file,
513                                   struct intel_ring_buffer *ring,
514                                   struct list_head *objects,
515                                   struct eb_objects *eb,
516                                   struct drm_i915_gem_exec_object2 *exec,
517                                   int count)
518 {
519         struct drm_i915_gem_relocation_entry *reloc;
520         struct drm_i915_gem_object *obj;
521         int *reloc_offset;
522         int i, total, ret;
523
524         /* We may process another execbuffer during the unlock... */
525         while (!list_empty(objects)) {
526                 obj = list_first_entry(objects,
527                                        struct drm_i915_gem_object,
528                                        exec_list);
529                 list_del_init(&obj->exec_list);
530                 drm_gem_object_unreference(&obj->base);
531         }
532
533         mutex_unlock(&dev->struct_mutex);
534
535         total = 0;
536         for (i = 0; i < count; i++)
537                 total += exec[i].relocation_count;
538
539         reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
540         reloc = drm_malloc_ab(total, sizeof(*reloc));
541         if (reloc == NULL || reloc_offset == NULL) {
542                 drm_free_large(reloc);
543                 drm_free_large(reloc_offset);
544                 mutex_lock(&dev->struct_mutex);
545                 return -ENOMEM;
546         }
547
548         total = 0;
549         for (i = 0; i < count; i++) {
550                 struct drm_i915_gem_relocation_entry __user *user_relocs;
551
552                 user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
553
554                 if (copy_from_user(reloc+total, user_relocs,
555                                    exec[i].relocation_count * sizeof(*reloc))) {
556                         ret = -EFAULT;
557                         mutex_lock(&dev->struct_mutex);
558                         goto err;
559                 }
560
561                 reloc_offset[i] = total;
562                 total += exec[i].relocation_count;
563         }
564
565         ret = i915_mutex_lock_interruptible(dev);
566         if (ret) {
567                 mutex_lock(&dev->struct_mutex);
568                 goto err;
569         }
570
571         /* reacquire the objects */
572         eb_reset(eb);
573         for (i = 0; i < count; i++) {
574                 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
575                                                         exec[i].handle));
576                 if (&obj->base == NULL) {
577                         DRM_DEBUG("Invalid object handle %d at index %d\n",
578                                    exec[i].handle, i);
579                         ret = -ENOENT;
580                         goto err;
581                 }
582
583                 list_add_tail(&obj->exec_list, objects);
584                 obj->exec_handle = exec[i].handle;
585                 obj->exec_entry = &exec[i];
586                 eb_add_object(eb, obj);
587         }
588
589         ret = i915_gem_execbuffer_reserve(ring, file, objects);
590         if (ret)
591                 goto err;
592
593         list_for_each_entry(obj, objects, exec_list) {
594                 int offset = obj->exec_entry - exec;
595                 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
596                                                                reloc + reloc_offset[offset]);
597                 if (ret)
598                         goto err;
599         }
600
601         /* Leave the user relocations as are, this is the painfully slow path,
602          * and we want to avoid the complication of dropping the lock whilst
603          * having buffers reserved in the aperture and so causing spurious
604          * ENOSPC for random operations.
605          */
606
607 err:
608         drm_free_large(reloc);
609         drm_free_large(reloc_offset);
610         return ret;
611 }
612
613 static int
614 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
615 {
616         u32 plane, flip_mask;
617         int ret;
618
619         /* Check for any pending flips. As we only maintain a flip queue depth
620          * of 1, we can simply insert a WAIT for the next display flip prior
621          * to executing the batch and avoid stalling the CPU.
622          */
623
624         for (plane = 0; flips >> plane; plane++) {
625                 if (((flips >> plane) & 1) == 0)
626                         continue;
627
628                 if (plane)
629                         flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
630                 else
631                         flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
632
633                 ret = intel_ring_begin(ring, 2);
634                 if (ret)
635                         return ret;
636
637                 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
638                 intel_ring_emit(ring, MI_NOOP);
639                 intel_ring_advance(ring);
640         }
641
642         return 0;
643 }
644
645 static int
646 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
647                                 struct list_head *objects)
648 {
649         struct drm_i915_gem_object *obj;
650         uint32_t flush_domains = 0;
651         uint32_t flips = 0;
652         int ret;
653
654         list_for_each_entry(obj, objects, exec_list) {
655                 ret = i915_gem_object_sync(obj, ring);
656                 if (ret)
657                         return ret;
658
659                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
660                         i915_gem_clflush_object(obj);
661
662                 if (obj->base.pending_write_domain)
663                         flips |= atomic_read(&obj->pending_flip);
664
665                 flush_domains |= obj->base.write_domain;
666         }
667
668         if (flips) {
669                 ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
670                 if (ret)
671                         return ret;
672         }
673
674         if (flush_domains & I915_GEM_DOMAIN_CPU)
675                 i915_gem_chipset_flush(ring->dev);
676
677         if (flush_domains & I915_GEM_DOMAIN_GTT)
678                 wmb();
679
680         /* Unconditionally invalidate gpu caches and ensure that we do flush
681          * any residual writes from the previous batch.
682          */
683         return intel_ring_invalidate_all_caches(ring);
684 }
685
686 static bool
687 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
688 {
689         return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
690 }
691
692 static int
693 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
694                    int count)
695 {
696         int i;
697
698         for (i = 0; i < count; i++) {
699                 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
700                 int length; /* limited by fault_in_pages_readable() */
701
702                 /* First check for malicious input causing overflow */
703                 if (exec[i].relocation_count >
704                     INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
705                         return -EINVAL;
706
707                 length = exec[i].relocation_count *
708                         sizeof(struct drm_i915_gem_relocation_entry);
709                 if (!access_ok(VERIFY_READ, ptr, length))
710                         return -EFAULT;
711
712                 /* we may also need to update the presumed offsets */
713                 if (!access_ok(VERIFY_WRITE, ptr, length))
714                         return -EFAULT;
715
716                 if (fault_in_multipages_readable(ptr, length))
717                         return -EFAULT;
718         }
719
720         return 0;
721 }
722
723 static void
724 i915_gem_execbuffer_move_to_active(struct list_head *objects,
725                                    struct intel_ring_buffer *ring,
726                                    u32 seqno)
727 {
728         struct drm_i915_gem_object *obj;
729
730         list_for_each_entry(obj, objects, exec_list) {
731                 u32 old_read = obj->base.read_domains;
732                 u32 old_write = obj->base.write_domain;
733
734                 obj->base.read_domains = obj->base.pending_read_domains;
735                 obj->base.write_domain = obj->base.pending_write_domain;
736                 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
737
738                 i915_gem_object_move_to_active(obj, ring, seqno);
739                 if (obj->base.write_domain) {
740                         obj->dirty = 1;
741                         obj->last_write_seqno = seqno;
742                         if (obj->pin_count) /* check for potential scanout */
743                                 intel_mark_fb_busy(obj);
744                 }
745
746                 trace_i915_gem_object_change_domain(obj, old_read, old_write);
747         }
748 }
749
750 static void
751 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
752                                     struct drm_file *file,
753                                     struct intel_ring_buffer *ring)
754 {
755         /* Unconditionally force add_request to emit a full flush. */
756         ring->gpu_caches_dirty = true;
757
758         /* Add a breadcrumb for the completion of the batch buffer */
759         (void)i915_add_request(ring, file, NULL);
760 }
761
762 static int
763 i915_reset_gen7_sol_offsets(struct drm_device *dev,
764                             struct intel_ring_buffer *ring)
765 {
766         drm_i915_private_t *dev_priv = dev->dev_private;
767         int ret, i;
768
769         if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
770                 return 0;
771
772         ret = intel_ring_begin(ring, 4 * 3);
773         if (ret)
774                 return ret;
775
776         for (i = 0; i < 4; i++) {
777                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
778                 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
779                 intel_ring_emit(ring, 0);
780         }
781
782         intel_ring_advance(ring);
783
784         return 0;
785 }
786
787 static int
788 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
789                        struct drm_file *file,
790                        struct drm_i915_gem_execbuffer2 *args,
791                        struct drm_i915_gem_exec_object2 *exec)
792 {
793         drm_i915_private_t *dev_priv = dev->dev_private;
794         struct list_head objects;
795         struct eb_objects *eb;
796         struct drm_i915_gem_object *batch_obj;
797         struct drm_clip_rect *cliprects = NULL;
798         struct intel_ring_buffer *ring;
799         u32 ctx_id = i915_execbuffer2_get_context_id(*args);
800         u32 exec_start, exec_len;
801         u32 seqno;
802         u32 mask;
803         u32 flags;
804         int ret, mode, i;
805
806         if (!i915_gem_check_execbuffer(args)) {
807                 DRM_DEBUG("execbuf with invalid offset/length\n");
808                 return -EINVAL;
809         }
810
811         ret = validate_exec_list(exec, args->buffer_count);
812         if (ret)
813                 return ret;
814
815         flags = 0;
816         if (args->flags & I915_EXEC_SECURE) {
817                 if (!file->is_master || !capable(CAP_SYS_ADMIN))
818                     return -EPERM;
819
820                 flags |= I915_DISPATCH_SECURE;
821         }
822
823         switch (args->flags & I915_EXEC_RING_MASK) {
824         case I915_EXEC_DEFAULT:
825         case I915_EXEC_RENDER:
826                 ring = &dev_priv->ring[RCS];
827                 break;
828         case I915_EXEC_BSD:
829                 ring = &dev_priv->ring[VCS];
830                 if (ctx_id != 0) {
831                         DRM_DEBUG("Ring %s doesn't support contexts\n",
832                                   ring->name);
833                         return -EPERM;
834                 }
835                 break;
836         case I915_EXEC_BLT:
837                 ring = &dev_priv->ring[BCS];
838                 if (ctx_id != 0) {
839                         DRM_DEBUG("Ring %s doesn't support contexts\n",
840                                   ring->name);
841                         return -EPERM;
842                 }
843                 break;
844         default:
845                 DRM_DEBUG("execbuf with unknown ring: %d\n",
846                           (int)(args->flags & I915_EXEC_RING_MASK));
847                 return -EINVAL;
848         }
849         if (!intel_ring_initialized(ring)) {
850                 DRM_DEBUG("execbuf with invalid ring: %d\n",
851                           (int)(args->flags & I915_EXEC_RING_MASK));
852                 return -EINVAL;
853         }
854
855         mode = args->flags & I915_EXEC_CONSTANTS_MASK;
856         mask = I915_EXEC_CONSTANTS_MASK;
857         switch (mode) {
858         case I915_EXEC_CONSTANTS_REL_GENERAL:
859         case I915_EXEC_CONSTANTS_ABSOLUTE:
860         case I915_EXEC_CONSTANTS_REL_SURFACE:
861                 if (ring == &dev_priv->ring[RCS] &&
862                     mode != dev_priv->relative_constants_mode) {
863                         if (INTEL_INFO(dev)->gen < 4)
864                                 return -EINVAL;
865
866                         if (INTEL_INFO(dev)->gen > 5 &&
867                             mode == I915_EXEC_CONSTANTS_REL_SURFACE)
868                                 return -EINVAL;
869
870                         /* The HW changed the meaning on this bit on gen6 */
871                         if (INTEL_INFO(dev)->gen >= 6)
872                                 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
873                 }
874                 break;
875         default:
876                 DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
877                 return -EINVAL;
878         }
879
880         if (args->buffer_count < 1) {
881                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
882                 return -EINVAL;
883         }
884
885         if (args->num_cliprects != 0) {
886                 if (ring != &dev_priv->ring[RCS]) {
887                         DRM_DEBUG("clip rectangles are only valid with the render ring\n");
888                         return -EINVAL;
889                 }
890
891                 if (INTEL_INFO(dev)->gen >= 5) {
892                         DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
893                         return -EINVAL;
894                 }
895
896                 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
897                         DRM_DEBUG("execbuf with %u cliprects\n",
898                                   args->num_cliprects);
899                         return -EINVAL;
900                 }
901
902                 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
903                                     GFP_KERNEL);
904                 if (cliprects == NULL) {
905                         ret = -ENOMEM;
906                         goto pre_mutex_err;
907                 }
908
909                 if (copy_from_user(cliprects,
910                                      (struct drm_clip_rect __user *)(uintptr_t)
911                                      args->cliprects_ptr,
912                                      sizeof(*cliprects)*args->num_cliprects)) {
913                         ret = -EFAULT;
914                         goto pre_mutex_err;
915                 }
916         }
917
918         ret = i915_mutex_lock_interruptible(dev);
919         if (ret)
920                 goto pre_mutex_err;
921
922         if (dev_priv->mm.suspended) {
923                 mutex_unlock(&dev->struct_mutex);
924                 ret = -EBUSY;
925                 goto pre_mutex_err;
926         }
927
928         eb = eb_create(args->buffer_count);
929         if (eb == NULL) {
930                 mutex_unlock(&dev->struct_mutex);
931                 ret = -ENOMEM;
932                 goto pre_mutex_err;
933         }
934
935         /* Look up object handles */
936         INIT_LIST_HEAD(&objects);
937         for (i = 0; i < args->buffer_count; i++) {
938                 struct drm_i915_gem_object *obj;
939
940                 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
941                                                         exec[i].handle));
942                 if (&obj->base == NULL) {
943                         DRM_DEBUG("Invalid object handle %d at index %d\n",
944                                    exec[i].handle, i);
945                         /* prevent error path from reading uninitialized data */
946                         ret = -ENOENT;
947                         goto err;
948                 }
949
950                 if (!list_empty(&obj->exec_list)) {
951                         DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
952                                    obj, exec[i].handle, i);
953                         ret = -EINVAL;
954                         goto err;
955                 }
956
957                 list_add_tail(&obj->exec_list, &objects);
958                 obj->exec_handle = exec[i].handle;
959                 obj->exec_entry = &exec[i];
960                 eb_add_object(eb, obj);
961         }
962
963         /* take note of the batch buffer before we might reorder the lists */
964         batch_obj = list_entry(objects.prev,
965                                struct drm_i915_gem_object,
966                                exec_list);
967
968         /* Move the objects en-masse into the GTT, evicting if necessary. */
969         ret = i915_gem_execbuffer_reserve(ring, file, &objects);
970         if (ret)
971                 goto err;
972
973         /* The objects are in their final locations, apply the relocations. */
974         ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
975         if (ret) {
976                 if (ret == -EFAULT) {
977                         ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
978                                                                 &objects, eb,
979                                                                 exec,
980                                                                 args->buffer_count);
981                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
982                 }
983                 if (ret)
984                         goto err;
985         }
986
987         /* Set the pending read domains for the batch buffer to COMMAND */
988         if (batch_obj->base.pending_write_domain) {
989                 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
990                 ret = -EINVAL;
991                 goto err;
992         }
993         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
994
995         /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
996          * batch" bit. Hence we need to pin secure batches into the global gtt.
997          * hsw should have this fixed, but let's be paranoid and do it
998          * unconditionally for now. */
999         if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1000                 i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1001
1002         ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1003         if (ret)
1004                 goto err;
1005
1006         seqno = i915_gem_next_request_seqno(ring);
1007         for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
1008                 if (seqno < ring->sync_seqno[i]) {
1009                         /* The GPU can not handle its semaphore value wrapping,
1010                          * so every billion or so execbuffers, we need to stall
1011                          * the GPU in order to reset the counters.
1012                          */
1013                         ret = i915_gpu_idle(dev);
1014                         if (ret)
1015                                 goto err;
1016                         i915_gem_retire_requests(dev);
1017
1018                         BUG_ON(ring->sync_seqno[i]);
1019                 }
1020         }
1021
1022         ret = i915_switch_context(ring, file, ctx_id);
1023         if (ret)
1024                 goto err;
1025
1026         if (ring == &dev_priv->ring[RCS] &&
1027             mode != dev_priv->relative_constants_mode) {
1028                 ret = intel_ring_begin(ring, 4);
1029                 if (ret)
1030                                 goto err;
1031
1032                 intel_ring_emit(ring, MI_NOOP);
1033                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1034                 intel_ring_emit(ring, INSTPM);
1035                 intel_ring_emit(ring, mask << 16 | mode);
1036                 intel_ring_advance(ring);
1037
1038                 dev_priv->relative_constants_mode = mode;
1039         }
1040
1041         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1042                 ret = i915_reset_gen7_sol_offsets(dev, ring);
1043                 if (ret)
1044                         goto err;
1045         }
1046
1047         trace_i915_gem_ring_dispatch(ring, seqno, flags);
1048
1049         exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1050         exec_len = args->batch_len;
1051         if (cliprects) {
1052                 for (i = 0; i < args->num_cliprects; i++) {
1053                         ret = i915_emit_box(dev, &cliprects[i],
1054                                             args->DR1, args->DR4);
1055                         if (ret)
1056                                 goto err;
1057
1058                         ret = ring->dispatch_execbuffer(ring,
1059                                                         exec_start, exec_len,
1060                                                         flags);
1061                         if (ret)
1062                                 goto err;
1063                 }
1064         } else {
1065                 ret = ring->dispatch_execbuffer(ring,
1066                                                 exec_start, exec_len,
1067                                                 flags);
1068                 if (ret)
1069                         goto err;
1070         }
1071
1072         i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1073         i915_gem_execbuffer_retire_commands(dev, file, ring);
1074
1075 err:
1076         eb_destroy(eb);
1077         while (!list_empty(&objects)) {
1078                 struct drm_i915_gem_object *obj;
1079
1080                 obj = list_first_entry(&objects,
1081                                        struct drm_i915_gem_object,
1082                                        exec_list);
1083                 list_del_init(&obj->exec_list);
1084                 drm_gem_object_unreference(&obj->base);
1085         }
1086
1087         mutex_unlock(&dev->struct_mutex);
1088
1089 pre_mutex_err:
1090         kfree(cliprects);
1091         return ret;
1092 }
1093
1094 /*
1095  * Legacy execbuffer just creates an exec2 list from the original exec object
1096  * list array and passes it to the real function.
1097  */
1098 int
1099 i915_gem_execbuffer(struct drm_device *dev, void *data,
1100                     struct drm_file *file)
1101 {
1102         struct drm_i915_gem_execbuffer *args = data;
1103         struct drm_i915_gem_execbuffer2 exec2;
1104         struct drm_i915_gem_exec_object *exec_list = NULL;
1105         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1106         int ret, i;
1107
1108         if (args->buffer_count < 1) {
1109                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1110                 return -EINVAL;
1111         }
1112
1113         /* Copy in the exec list from userland */
1114         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1115         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1116         if (exec_list == NULL || exec2_list == NULL) {
1117                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1118                           args->buffer_count);
1119                 drm_free_large(exec_list);
1120                 drm_free_large(exec2_list);
1121                 return -ENOMEM;
1122         }
1123         ret = copy_from_user(exec_list,
1124                              (void __user *)(uintptr_t)args->buffers_ptr,
1125                              sizeof(*exec_list) * args->buffer_count);
1126         if (ret != 0) {
1127                 DRM_DEBUG("copy %d exec entries failed %d\n",
1128                           args->buffer_count, ret);
1129                 drm_free_large(exec_list);
1130                 drm_free_large(exec2_list);
1131                 return -EFAULT;
1132         }
1133
1134         for (i = 0; i < args->buffer_count; i++) {
1135                 exec2_list[i].handle = exec_list[i].handle;
1136                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1137                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1138                 exec2_list[i].alignment = exec_list[i].alignment;
1139                 exec2_list[i].offset = exec_list[i].offset;
1140                 if (INTEL_INFO(dev)->gen < 4)
1141                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1142                 else
1143                         exec2_list[i].flags = 0;
1144         }
1145
1146         exec2.buffers_ptr = args->buffers_ptr;
1147         exec2.buffer_count = args->buffer_count;
1148         exec2.batch_start_offset = args->batch_start_offset;
1149         exec2.batch_len = args->batch_len;
1150         exec2.DR1 = args->DR1;
1151         exec2.DR4 = args->DR4;
1152         exec2.num_cliprects = args->num_cliprects;
1153         exec2.cliprects_ptr = args->cliprects_ptr;
1154         exec2.flags = I915_EXEC_RENDER;
1155         i915_execbuffer2_set_context_id(exec2, 0);
1156
1157         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1158         if (!ret) {
1159                 /* Copy the new buffer offsets back to the user's exec list. */
1160                 for (i = 0; i < args->buffer_count; i++)
1161                         exec_list[i].offset = exec2_list[i].offset;
1162                 /* ... and back out to userspace */
1163                 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1164                                    exec_list,
1165                                    sizeof(*exec_list) * args->buffer_count);
1166                 if (ret) {
1167                         ret = -EFAULT;
1168                         DRM_DEBUG("failed to copy %d exec entries "
1169                                   "back to user (%d)\n",
1170                                   args->buffer_count, ret);
1171                 }
1172         }
1173
1174         drm_free_large(exec_list);
1175         drm_free_large(exec2_list);
1176         return ret;
1177 }
1178
1179 int
1180 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1181                      struct drm_file *file)
1182 {
1183         struct drm_i915_gem_execbuffer2 *args = data;
1184         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1185         int ret;
1186
1187         if (args->buffer_count < 1 ||
1188             args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1189                 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1190                 return -EINVAL;
1191         }
1192
1193         exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1194                              GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
1195         if (exec2_list == NULL)
1196                 exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1197                                            args->buffer_count);
1198         if (exec2_list == NULL) {
1199                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1200                           args->buffer_count);
1201                 return -ENOMEM;
1202         }
1203         ret = copy_from_user(exec2_list,
1204                              (struct drm_i915_relocation_entry __user *)
1205                              (uintptr_t) args->buffers_ptr,
1206                              sizeof(*exec2_list) * args->buffer_count);
1207         if (ret != 0) {
1208                 DRM_DEBUG("copy %d exec entries failed %d\n",
1209                           args->buffer_count, ret);
1210                 drm_free_large(exec2_list);
1211                 return -EFAULT;
1212         }
1213
1214         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1215         if (!ret) {
1216                 /* Copy the new buffer offsets back to the user's exec list. */
1217                 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1218                                    exec2_list,
1219                                    sizeof(*exec2_list) * args->buffer_count);
1220                 if (ret) {
1221                         ret = -EFAULT;
1222                         DRM_DEBUG("failed to copy %d exec entries "
1223                                   "back to user (%d)\n",
1224                                   args->buffer_count, ret);
1225                 }
1226         }
1227
1228         drm_free_large(exec2_list);
1229         return ret;
1230 }