drm/i915: Rename dev_priv->gtt to dev_priv->ggtt
[linux-2.6-block.git] / drivers / gpu / drm / i915 / intel_ringbuffer.c
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29
30 #include <linux/log2.h>
31 #include <drm/drmP.h>
32 #include "i915_drv.h"
33 #include <drm/i915_drm.h>
34 #include "i915_trace.h"
35 #include "intel_drv.h"
36
37 int __intel_ring_space(int head, int tail, int size)
38 {
39         int space = head - tail;
40         if (space <= 0)
41                 space += size;
42         return space - I915_RING_FREE_SPACE;
43 }
44
45 void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
46 {
47         if (ringbuf->last_retired_head != -1) {
48                 ringbuf->head = ringbuf->last_retired_head;
49                 ringbuf->last_retired_head = -1;
50         }
51
52         ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
53                                             ringbuf->tail, ringbuf->size);
54 }
55
56 int intel_ring_space(struct intel_ringbuffer *ringbuf)
57 {
58         intel_ring_update_space(ringbuf);
59         return ringbuf->space;
60 }
61
62 bool intel_engine_stopped(struct intel_engine_cs *engine)
63 {
64         struct drm_i915_private *dev_priv = engine->dev->dev_private;
65         return dev_priv->gpu_error.stop_rings & intel_engine_flag(engine);
66 }
67
68 static void __intel_ring_advance(struct intel_engine_cs *engine)
69 {
70         struct intel_ringbuffer *ringbuf = engine->buffer;
71         ringbuf->tail &= ringbuf->size - 1;
72         if (intel_engine_stopped(engine))
73                 return;
74         engine->write_tail(engine, ringbuf->tail);
75 }
76
77 static int
78 gen2_render_ring_flush(struct drm_i915_gem_request *req,
79                        u32      invalidate_domains,
80                        u32      flush_domains)
81 {
82         struct intel_engine_cs *engine = req->engine;
83         u32 cmd;
84         int ret;
85
86         cmd = MI_FLUSH;
87         if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
88                 cmd |= MI_NO_WRITE_FLUSH;
89
90         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
91                 cmd |= MI_READ_FLUSH;
92
93         ret = intel_ring_begin(req, 2);
94         if (ret)
95                 return ret;
96
97         intel_ring_emit(engine, cmd);
98         intel_ring_emit(engine, MI_NOOP);
99         intel_ring_advance(engine);
100
101         return 0;
102 }
103
104 static int
105 gen4_render_ring_flush(struct drm_i915_gem_request *req,
106                        u32      invalidate_domains,
107                        u32      flush_domains)
108 {
109         struct intel_engine_cs *engine = req->engine;
110         struct drm_device *dev = engine->dev;
111         u32 cmd;
112         int ret;
113
114         /*
115          * read/write caches:
116          *
117          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
118          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
119          * also flushed at 2d versus 3d pipeline switches.
120          *
121          * read-only caches:
122          *
123          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
124          * MI_READ_FLUSH is set, and is always flushed on 965.
125          *
126          * I915_GEM_DOMAIN_COMMAND may not exist?
127          *
128          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
129          * invalidated when MI_EXE_FLUSH is set.
130          *
131          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
132          * invalidated with every MI_FLUSH.
133          *
134          * TLBs:
135          *
136          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
137          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
138          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
139          * are flushed at any MI_FLUSH.
140          */
141
142         cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
143         if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
144                 cmd &= ~MI_NO_WRITE_FLUSH;
145         if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
146                 cmd |= MI_EXE_FLUSH;
147
148         if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
149             (IS_G4X(dev) || IS_GEN5(dev)))
150                 cmd |= MI_INVALIDATE_ISP;
151
152         ret = intel_ring_begin(req, 2);
153         if (ret)
154                 return ret;
155
156         intel_ring_emit(engine, cmd);
157         intel_ring_emit(engine, MI_NOOP);
158         intel_ring_advance(engine);
159
160         return 0;
161 }
162
163 /**
164  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
165  * implementing two workarounds on gen6.  From section 1.4.7.1
166  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
167  *
168  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
169  * produced by non-pipelined state commands), software needs to first
170  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
171  * 0.
172  *
173  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
174  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
175  *
176  * And the workaround for these two requires this workaround first:
177  *
178  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
179  * BEFORE the pipe-control with a post-sync op and no write-cache
180  * flushes.
181  *
182  * And this last workaround is tricky because of the requirements on
183  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
184  * volume 2 part 1:
185  *
186  *     "1 of the following must also be set:
187  *      - Render Target Cache Flush Enable ([12] of DW1)
188  *      - Depth Cache Flush Enable ([0] of DW1)
189  *      - Stall at Pixel Scoreboard ([1] of DW1)
190  *      - Depth Stall ([13] of DW1)
191  *      - Post-Sync Operation ([13] of DW1)
192  *      - Notify Enable ([8] of DW1)"
193  *
194  * The cache flushes require the workaround flush that triggered this
195  * one, so we can't use it.  Depth stall would trigger the same.
196  * Post-sync nonzero is what triggered this second workaround, so we
197  * can't use that one either.  Notify enable is IRQs, which aren't
198  * really our business.  That leaves only stall at scoreboard.
199  */
200 static int
201 intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
202 {
203         struct intel_engine_cs *engine = req->engine;
204         u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
205         int ret;
206
207         ret = intel_ring_begin(req, 6);
208         if (ret)
209                 return ret;
210
211         intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5));
212         intel_ring_emit(engine, PIPE_CONTROL_CS_STALL |
213                         PIPE_CONTROL_STALL_AT_SCOREBOARD);
214         intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
215         intel_ring_emit(engine, 0); /* low dword */
216         intel_ring_emit(engine, 0); /* high dword */
217         intel_ring_emit(engine, MI_NOOP);
218         intel_ring_advance(engine);
219
220         ret = intel_ring_begin(req, 6);
221         if (ret)
222                 return ret;
223
224         intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(5));
225         intel_ring_emit(engine, PIPE_CONTROL_QW_WRITE);
226         intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
227         intel_ring_emit(engine, 0);
228         intel_ring_emit(engine, 0);
229         intel_ring_emit(engine, MI_NOOP);
230         intel_ring_advance(engine);
231
232         return 0;
233 }
234
235 static int
236 gen6_render_ring_flush(struct drm_i915_gem_request *req,
237                        u32 invalidate_domains, u32 flush_domains)
238 {
239         struct intel_engine_cs *engine = req->engine;
240         u32 flags = 0;
241         u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
242         int ret;
243
244         /* Force SNB workarounds for PIPE_CONTROL flushes */
245         ret = intel_emit_post_sync_nonzero_flush(req);
246         if (ret)
247                 return ret;
248
249         /* Just flush everything.  Experiments have shown that reducing the
250          * number of bits based on the write domains has little performance
251          * impact.
252          */
253         if (flush_domains) {
254                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
255                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
256                 /*
257                  * Ensure that any following seqno writes only happen
258                  * when the render cache is indeed flushed.
259                  */
260                 flags |= PIPE_CONTROL_CS_STALL;
261         }
262         if (invalidate_domains) {
263                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
264                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
265                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
266                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
267                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
268                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
269                 /*
270                  * TLB invalidate requires a post-sync write.
271                  */
272                 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
273         }
274
275         ret = intel_ring_begin(req, 4);
276         if (ret)
277                 return ret;
278
279         intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4));
280         intel_ring_emit(engine, flags);
281         intel_ring_emit(engine, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
282         intel_ring_emit(engine, 0);
283         intel_ring_advance(engine);
284
285         return 0;
286 }
287
288 static int
289 gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
290 {
291         struct intel_engine_cs *engine = req->engine;
292         int ret;
293
294         ret = intel_ring_begin(req, 4);
295         if (ret)
296                 return ret;
297
298         intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4));
299         intel_ring_emit(engine, PIPE_CONTROL_CS_STALL |
300                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
301         intel_ring_emit(engine, 0);
302         intel_ring_emit(engine, 0);
303         intel_ring_advance(engine);
304
305         return 0;
306 }
307
308 static int
309 gen7_render_ring_flush(struct drm_i915_gem_request *req,
310                        u32 invalidate_domains, u32 flush_domains)
311 {
312         struct intel_engine_cs *engine = req->engine;
313         u32 flags = 0;
314         u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
315         int ret;
316
317         /*
318          * Ensure that any following seqno writes only happen when the render
319          * cache is indeed flushed.
320          *
321          * Workaround: 4th PIPE_CONTROL command (except the ones with only
322          * read-cache invalidate bits set) must have the CS_STALL bit set. We
323          * don't try to be clever and just set it unconditionally.
324          */
325         flags |= PIPE_CONTROL_CS_STALL;
326
327         /* Just flush everything.  Experiments have shown that reducing the
328          * number of bits based on the write domains has little performance
329          * impact.
330          */
331         if (flush_domains) {
332                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
333                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
334                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
335                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
336         }
337         if (invalidate_domains) {
338                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
339                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
340                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
341                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
342                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
343                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
344                 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
345                 /*
346                  * TLB invalidate requires a post-sync write.
347                  */
348                 flags |= PIPE_CONTROL_QW_WRITE;
349                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
350
351                 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
352
353                 /* Workaround: we must issue a pipe_control with CS-stall bit
354                  * set before a pipe_control command that has the state cache
355                  * invalidate bit set. */
356                 gen7_render_ring_cs_stall_wa(req);
357         }
358
359         ret = intel_ring_begin(req, 4);
360         if (ret)
361                 return ret;
362
363         intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(4));
364         intel_ring_emit(engine, flags);
365         intel_ring_emit(engine, scratch_addr);
366         intel_ring_emit(engine, 0);
367         intel_ring_advance(engine);
368
369         return 0;
370 }
371
372 static int
373 gen8_emit_pipe_control(struct drm_i915_gem_request *req,
374                        u32 flags, u32 scratch_addr)
375 {
376         struct intel_engine_cs *engine = req->engine;
377         int ret;
378
379         ret = intel_ring_begin(req, 6);
380         if (ret)
381                 return ret;
382
383         intel_ring_emit(engine, GFX_OP_PIPE_CONTROL(6));
384         intel_ring_emit(engine, flags);
385         intel_ring_emit(engine, scratch_addr);
386         intel_ring_emit(engine, 0);
387         intel_ring_emit(engine, 0);
388         intel_ring_emit(engine, 0);
389         intel_ring_advance(engine);
390
391         return 0;
392 }
393
394 static int
395 gen8_render_ring_flush(struct drm_i915_gem_request *req,
396                        u32 invalidate_domains, u32 flush_domains)
397 {
398         u32 flags = 0;
399         u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
400         int ret;
401
402         flags |= PIPE_CONTROL_CS_STALL;
403
404         if (flush_domains) {
405                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
406                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
407                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
408                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
409         }
410         if (invalidate_domains) {
411                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
412                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
413                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
414                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
415                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
416                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
417                 flags |= PIPE_CONTROL_QW_WRITE;
418                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
419
420                 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
421                 ret = gen8_emit_pipe_control(req,
422                                              PIPE_CONTROL_CS_STALL |
423                                              PIPE_CONTROL_STALL_AT_SCOREBOARD,
424                                              0);
425                 if (ret)
426                         return ret;
427         }
428
429         return gen8_emit_pipe_control(req, flags, scratch_addr);
430 }
431
432 static void ring_write_tail(struct intel_engine_cs *engine,
433                             u32 value)
434 {
435         struct drm_i915_private *dev_priv = engine->dev->dev_private;
436         I915_WRITE_TAIL(engine, value);
437 }
438
439 u64 intel_ring_get_active_head(struct intel_engine_cs *engine)
440 {
441         struct drm_i915_private *dev_priv = engine->dev->dev_private;
442         u64 acthd;
443
444         if (INTEL_INFO(engine->dev)->gen >= 8)
445                 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
446                                          RING_ACTHD_UDW(engine->mmio_base));
447         else if (INTEL_INFO(engine->dev)->gen >= 4)
448                 acthd = I915_READ(RING_ACTHD(engine->mmio_base));
449         else
450                 acthd = I915_READ(ACTHD);
451
452         return acthd;
453 }
454
455 static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
456 {
457         struct drm_i915_private *dev_priv = engine->dev->dev_private;
458         u32 addr;
459
460         addr = dev_priv->status_page_dmah->busaddr;
461         if (INTEL_INFO(engine->dev)->gen >= 4)
462                 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
463         I915_WRITE(HWS_PGA, addr);
464 }
465
466 static void intel_ring_setup_status_page(struct intel_engine_cs *engine)
467 {
468         struct drm_device *dev = engine->dev;
469         struct drm_i915_private *dev_priv = engine->dev->dev_private;
470         i915_reg_t mmio;
471
472         /* The ring status page addresses are no longer next to the rest of
473          * the ring registers as of gen7.
474          */
475         if (IS_GEN7(dev)) {
476                 switch (engine->id) {
477                 case RCS:
478                         mmio = RENDER_HWS_PGA_GEN7;
479                         break;
480                 case BCS:
481                         mmio = BLT_HWS_PGA_GEN7;
482                         break;
483                 /*
484                  * VCS2 actually doesn't exist on Gen7. Only shut up
485                  * gcc switch check warning
486                  */
487                 case VCS2:
488                 case VCS:
489                         mmio = BSD_HWS_PGA_GEN7;
490                         break;
491                 case VECS:
492                         mmio = VEBOX_HWS_PGA_GEN7;
493                         break;
494                 }
495         } else if (IS_GEN6(engine->dev)) {
496                 mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
497         } else {
498                 /* XXX: gen8 returns to sanity */
499                 mmio = RING_HWS_PGA(engine->mmio_base);
500         }
501
502         I915_WRITE(mmio, (u32)engine->status_page.gfx_addr);
503         POSTING_READ(mmio);
504
505         /*
506          * Flush the TLB for this page
507          *
508          * FIXME: These two bits have disappeared on gen8, so a question
509          * arises: do we still need this and if so how should we go about
510          * invalidating the TLB?
511          */
512         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
513                 i915_reg_t reg = RING_INSTPM(engine->mmio_base);
514
515                 /* ring should be idle before issuing a sync flush*/
516                 WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
517
518                 I915_WRITE(reg,
519                            _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
520                                               INSTPM_SYNC_FLUSH));
521                 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
522                              1000))
523                         DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
524                                   engine->name);
525         }
526 }
527
528 static bool stop_ring(struct intel_engine_cs *engine)
529 {
530         struct drm_i915_private *dev_priv = to_i915(engine->dev);
531
532         if (!IS_GEN2(engine->dev)) {
533                 I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
534                 if (wait_for((I915_READ_MODE(engine) & MODE_IDLE) != 0, 1000)) {
535                         DRM_ERROR("%s : timed out trying to stop ring\n",
536                                   engine->name);
537                         /* Sometimes we observe that the idle flag is not
538                          * set even though the ring is empty. So double
539                          * check before giving up.
540                          */
541                         if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine))
542                                 return false;
543                 }
544         }
545
546         I915_WRITE_CTL(engine, 0);
547         I915_WRITE_HEAD(engine, 0);
548         engine->write_tail(engine, 0);
549
550         if (!IS_GEN2(engine->dev)) {
551                 (void)I915_READ_CTL(engine);
552                 I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
553         }
554
555         return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0;
556 }
557
558 static int init_ring_common(struct intel_engine_cs *engine)
559 {
560         struct drm_device *dev = engine->dev;
561         struct drm_i915_private *dev_priv = dev->dev_private;
562         struct intel_ringbuffer *ringbuf = engine->buffer;
563         struct drm_i915_gem_object *obj = ringbuf->obj;
564         int ret = 0;
565
566         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
567
568         if (!stop_ring(engine)) {
569                 /* G45 ring initialization often fails to reset head to zero */
570                 DRM_DEBUG_KMS("%s head not reset to zero "
571                               "ctl %08x head %08x tail %08x start %08x\n",
572                               engine->name,
573                               I915_READ_CTL(engine),
574                               I915_READ_HEAD(engine),
575                               I915_READ_TAIL(engine),
576                               I915_READ_START(engine));
577
578                 if (!stop_ring(engine)) {
579                         DRM_ERROR("failed to set %s head to zero "
580                                   "ctl %08x head %08x tail %08x start %08x\n",
581                                   engine->name,
582                                   I915_READ_CTL(engine),
583                                   I915_READ_HEAD(engine),
584                                   I915_READ_TAIL(engine),
585                                   I915_READ_START(engine));
586                         ret = -EIO;
587                         goto out;
588                 }
589         }
590
591         if (I915_NEED_GFX_HWS(dev))
592                 intel_ring_setup_status_page(engine);
593         else
594                 ring_setup_phys_status_page(engine);
595
596         /* Enforce ordering by reading HEAD register back */
597         I915_READ_HEAD(engine);
598
599         /* Initialize the ring. This must happen _after_ we've cleared the ring
600          * registers with the above sequence (the readback of the HEAD registers
601          * also enforces ordering), otherwise the hw might lose the new ring
602          * register values. */
603         I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj));
604
605         /* WaClearRingBufHeadRegAtInit:ctg,elk */
606         if (I915_READ_HEAD(engine))
607                 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
608                           engine->name, I915_READ_HEAD(engine));
609         I915_WRITE_HEAD(engine, 0);
610         (void)I915_READ_HEAD(engine);
611
612         I915_WRITE_CTL(engine,
613                         ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
614                         | RING_VALID);
615
616         /* If the head is still not zero, the ring is dead */
617         if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
618                      I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
619                      (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
620                 DRM_ERROR("%s initialization failed "
621                           "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
622                           engine->name,
623                           I915_READ_CTL(engine),
624                           I915_READ_CTL(engine) & RING_VALID,
625                           I915_READ_HEAD(engine), I915_READ_TAIL(engine),
626                           I915_READ_START(engine),
627                           (unsigned long)i915_gem_obj_ggtt_offset(obj));
628                 ret = -EIO;
629                 goto out;
630         }
631
632         ringbuf->last_retired_head = -1;
633         ringbuf->head = I915_READ_HEAD(engine);
634         ringbuf->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
635         intel_ring_update_space(ringbuf);
636
637         memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
638
639 out:
640         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
641
642         return ret;
643 }
644
645 void
646 intel_fini_pipe_control(struct intel_engine_cs *engine)
647 {
648         struct drm_device *dev = engine->dev;
649
650         if (engine->scratch.obj == NULL)
651                 return;
652
653         if (INTEL_INFO(dev)->gen >= 5) {
654                 kunmap(sg_page(engine->scratch.obj->pages->sgl));
655                 i915_gem_object_ggtt_unpin(engine->scratch.obj);
656         }
657
658         drm_gem_object_unreference(&engine->scratch.obj->base);
659         engine->scratch.obj = NULL;
660 }
661
662 int
663 intel_init_pipe_control(struct intel_engine_cs *engine)
664 {
665         int ret;
666
667         WARN_ON(engine->scratch.obj);
668
669         engine->scratch.obj = i915_gem_alloc_object(engine->dev, 4096);
670         if (engine->scratch.obj == NULL) {
671                 DRM_ERROR("Failed to allocate seqno page\n");
672                 ret = -ENOMEM;
673                 goto err;
674         }
675
676         ret = i915_gem_object_set_cache_level(engine->scratch.obj,
677                                               I915_CACHE_LLC);
678         if (ret)
679                 goto err_unref;
680
681         ret = i915_gem_obj_ggtt_pin(engine->scratch.obj, 4096, 0);
682         if (ret)
683                 goto err_unref;
684
685         engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(engine->scratch.obj);
686         engine->scratch.cpu_page = kmap(sg_page(engine->scratch.obj->pages->sgl));
687         if (engine->scratch.cpu_page == NULL) {
688                 ret = -ENOMEM;
689                 goto err_unpin;
690         }
691
692         DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
693                          engine->name, engine->scratch.gtt_offset);
694         return 0;
695
696 err_unpin:
697         i915_gem_object_ggtt_unpin(engine->scratch.obj);
698 err_unref:
699         drm_gem_object_unreference(&engine->scratch.obj->base);
700 err:
701         return ret;
702 }
703
704 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
705 {
706         int ret, i;
707         struct intel_engine_cs *engine = req->engine;
708         struct drm_device *dev = engine->dev;
709         struct drm_i915_private *dev_priv = dev->dev_private;
710         struct i915_workarounds *w = &dev_priv->workarounds;
711
712         if (w->count == 0)
713                 return 0;
714
715         engine->gpu_caches_dirty = true;
716         ret = intel_ring_flush_all_caches(req);
717         if (ret)
718                 return ret;
719
720         ret = intel_ring_begin(req, (w->count * 2 + 2));
721         if (ret)
722                 return ret;
723
724         intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(w->count));
725         for (i = 0; i < w->count; i++) {
726                 intel_ring_emit_reg(engine, w->reg[i].addr);
727                 intel_ring_emit(engine, w->reg[i].value);
728         }
729         intel_ring_emit(engine, MI_NOOP);
730
731         intel_ring_advance(engine);
732
733         engine->gpu_caches_dirty = true;
734         ret = intel_ring_flush_all_caches(req);
735         if (ret)
736                 return ret;
737
738         DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
739
740         return 0;
741 }
742
743 static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
744 {
745         int ret;
746
747         ret = intel_ring_workarounds_emit(req);
748         if (ret != 0)
749                 return ret;
750
751         ret = i915_gem_render_state_init(req);
752         if (ret)
753                 return ret;
754
755         return 0;
756 }
757
758 static int wa_add(struct drm_i915_private *dev_priv,
759                   i915_reg_t addr,
760                   const u32 mask, const u32 val)
761 {
762         const u32 idx = dev_priv->workarounds.count;
763
764         if (WARN_ON(idx >= I915_MAX_WA_REGS))
765                 return -ENOSPC;
766
767         dev_priv->workarounds.reg[idx].addr = addr;
768         dev_priv->workarounds.reg[idx].value = val;
769         dev_priv->workarounds.reg[idx].mask = mask;
770
771         dev_priv->workarounds.count++;
772
773         return 0;
774 }
775
776 #define WA_REG(addr, mask, val) do { \
777                 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
778                 if (r) \
779                         return r; \
780         } while (0)
781
782 #define WA_SET_BIT_MASKED(addr, mask) \
783         WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
784
785 #define WA_CLR_BIT_MASKED(addr, mask) \
786         WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
787
788 #define WA_SET_FIELD_MASKED(addr, mask, value) \
789         WA_REG(addr, mask, _MASKED_FIELD(mask, value))
790
791 #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
792 #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
793
794 #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
795
796 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
797                                  i915_reg_t reg)
798 {
799         struct drm_i915_private *dev_priv = engine->dev->dev_private;
800         struct i915_workarounds *wa = &dev_priv->workarounds;
801         const uint32_t index = wa->hw_whitelist_count[engine->id];
802
803         if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
804                 return -EINVAL;
805
806         WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
807                  i915_mmio_reg_offset(reg));
808         wa->hw_whitelist_count[engine->id]++;
809
810         return 0;
811 }
812
813 static int gen8_init_workarounds(struct intel_engine_cs *engine)
814 {
815         struct drm_device *dev = engine->dev;
816         struct drm_i915_private *dev_priv = dev->dev_private;
817
818         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
819
820         /* WaDisableAsyncFlipPerfMode:bdw,chv */
821         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
822
823         /* WaDisablePartialInstShootdown:bdw,chv */
824         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
825                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
826
827         /* Use Force Non-Coherent whenever executing a 3D context. This is a
828          * workaround for for a possible hang in the unlikely event a TLB
829          * invalidation occurs during a PSD flush.
830          */
831         /* WaForceEnableNonCoherent:bdw,chv */
832         /* WaHdcDisableFetchWhenMasked:bdw,chv */
833         WA_SET_BIT_MASKED(HDC_CHICKEN0,
834                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
835                           HDC_FORCE_NON_COHERENT);
836
837         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
838          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
839          *  polygons in the same 8x4 pixel/sample area to be processed without
840          *  stalling waiting for the earlier ones to write to Hierarchical Z
841          *  buffer."
842          *
843          * This optimization is off by default for BDW and CHV; turn it on.
844          */
845         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
846
847         /* Wa4x4STCOptimizationDisable:bdw,chv */
848         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
849
850         /*
851          * BSpec recommends 8x4 when MSAA is used,
852          * however in practice 16x4 seems fastest.
853          *
854          * Note that PS/WM thread counts depend on the WIZ hashing
855          * disable bit, which we don't touch here, but it's good
856          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
857          */
858         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
859                             GEN6_WIZ_HASHING_MASK,
860                             GEN6_WIZ_HASHING_16x4);
861
862         return 0;
863 }
864
865 static int bdw_init_workarounds(struct intel_engine_cs *engine)
866 {
867         int ret;
868         struct drm_device *dev = engine->dev;
869         struct drm_i915_private *dev_priv = dev->dev_private;
870
871         ret = gen8_init_workarounds(engine);
872         if (ret)
873                 return ret;
874
875         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
876         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
877
878         /* WaDisableDopClockGating:bdw */
879         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
880                           DOP_CLOCK_GATING_DISABLE);
881
882         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
883                           GEN8_SAMPLER_POWER_BYPASS_DIS);
884
885         WA_SET_BIT_MASKED(HDC_CHICKEN0,
886                           /* WaForceContextSaveRestoreNonCoherent:bdw */
887                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
888                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
889                           (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
890
891         return 0;
892 }
893
894 static int chv_init_workarounds(struct intel_engine_cs *engine)
895 {
896         int ret;
897         struct drm_device *dev = engine->dev;
898         struct drm_i915_private *dev_priv = dev->dev_private;
899
900         ret = gen8_init_workarounds(engine);
901         if (ret)
902                 return ret;
903
904         /* WaDisableThreadStallDopClockGating:chv */
905         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
906
907         /* Improve HiZ throughput on CHV. */
908         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
909
910         return 0;
911 }
912
913 static int gen9_init_workarounds(struct intel_engine_cs *engine)
914 {
915         struct drm_device *dev = engine->dev;
916         struct drm_i915_private *dev_priv = dev->dev_private;
917         uint32_t tmp;
918         int ret;
919
920         /* WaEnableLbsSlaRetryTimerDecrement:skl */
921         I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
922                    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
923
924         /* WaDisableKillLogic:bxt,skl */
925         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
926                    ECOCHK_DIS_TLB);
927
928         /* WaClearFlowControlGpgpuContextSave:skl,bxt */
929         /* WaDisablePartialInstShootdown:skl,bxt */
930         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
931                           FLOW_CONTROL_ENABLE |
932                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
933
934         /* Syncing dependencies between camera and graphics:skl,bxt */
935         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
936                           GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
937
938         /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
939         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
940             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
941                 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
942                                   GEN9_DG_MIRROR_FIX_ENABLE);
943
944         /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
945         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
946             IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
947                 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
948                                   GEN9_RHWO_OPTIMIZATION_DISABLE);
949                 /*
950                  * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
951                  * but we do that in per ctx batchbuffer as there is an issue
952                  * with this register not getting restored on ctx restore
953                  */
954         }
955
956         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
957         if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
958                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
959                                   GEN9_ENABLE_YV12_BUGFIX);
960
961         /* Wa4x4STCOptimizationDisable:skl,bxt */
962         /* WaDisablePartialResolveInVc:skl,bxt */
963         WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
964                                          GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
965
966         /* WaCcsTlbPrefetchDisable:skl,bxt */
967         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
968                           GEN9_CCS_TLB_PREFETCH_ENABLE);
969
970         /* WaDisableMaskBasedCammingInRCC:skl,bxt */
971         if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
972             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
973                 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
974                                   PIXEL_MASK_CAMMING_DISABLE);
975
976         /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
977         tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
978         if (IS_SKL_REVID(dev, SKL_REVID_F0, SKL_REVID_F0) ||
979             IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
980                 tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
981         WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
982
983         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
984         if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
985                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
986                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
987
988         /* WaDisableSTUnitPowerOptimization:skl,bxt */
989         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
990
991         /* WaOCLCoherentLineFlush:skl,bxt */
992         I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
993                                     GEN8_LQSC_FLUSH_COHERENT_LINES));
994
995         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
996         ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
997         if (ret)
998                 return ret;
999
1000         /* WaAllowUMDToModifyHDCChicken1:skl,bxt */
1001         ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
1002         if (ret)
1003                 return ret;
1004
1005         return 0;
1006 }
1007
1008 static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
1009 {
1010         struct drm_device *dev = engine->dev;
1011         struct drm_i915_private *dev_priv = dev->dev_private;
1012         u8 vals[3] = { 0, 0, 0 };
1013         unsigned int i;
1014
1015         for (i = 0; i < 3; i++) {
1016                 u8 ss;
1017
1018                 /*
1019                  * Only consider slices where one, and only one, subslice has 7
1020                  * EUs
1021                  */
1022                 if (!is_power_of_2(dev_priv->info.subslice_7eu[i]))
1023                         continue;
1024
1025                 /*
1026                  * subslice_7eu[i] != 0 (because of the check above) and
1027                  * ss_max == 4 (maximum number of subslices possible per slice)
1028                  *
1029                  * ->    0 <= ss <= 3;
1030                  */
1031                 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1032                 vals[i] = 3 - ss;
1033         }
1034
1035         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1036                 return 0;
1037
1038         /* Tune IZ hashing. See intel_device_info_runtime_init() */
1039         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1040                             GEN9_IZ_HASHING_MASK(2) |
1041                             GEN9_IZ_HASHING_MASK(1) |
1042                             GEN9_IZ_HASHING_MASK(0),
1043                             GEN9_IZ_HASHING(2, vals[2]) |
1044                             GEN9_IZ_HASHING(1, vals[1]) |
1045                             GEN9_IZ_HASHING(0, vals[0]));
1046
1047         return 0;
1048 }
1049
1050 static int skl_init_workarounds(struct intel_engine_cs *engine)
1051 {
1052         int ret;
1053         struct drm_device *dev = engine->dev;
1054         struct drm_i915_private *dev_priv = dev->dev_private;
1055
1056         ret = gen9_init_workarounds(engine);
1057         if (ret)
1058                 return ret;
1059
1060         /*
1061          * Actual WA is to disable percontext preemption granularity control
1062          * until D0 which is the default case so this is equivalent to
1063          * !WaDisablePerCtxtPreemptionGranularityControl:skl
1064          */
1065         if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) {
1066                 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1067                            _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1068         }
1069
1070         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
1071                 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
1072                 I915_WRITE(FF_SLICE_CS_CHICKEN2,
1073                            _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
1074         }
1075
1076         /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1077          * involving this register should also be added to WA batch as required.
1078          */
1079         if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
1080                 /* WaDisableLSQCROPERFforOCL:skl */
1081                 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1082                            GEN8_LQSC_RO_PERF_DIS);
1083
1084         /* WaEnableGapsTsvCreditFix:skl */
1085         if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
1086                 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1087                                            GEN9_GAPS_TSV_CREDIT_DISABLE));
1088         }
1089
1090         /* WaDisablePowerCompilerClockGating:skl */
1091         if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
1092                 WA_SET_BIT_MASKED(HIZ_CHICKEN,
1093                                   BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1094
1095         if (IS_SKL_REVID(dev, 0, SKL_REVID_F0)) {
1096                 /*
1097                  *Use Force Non-Coherent whenever executing a 3D context. This
1098                  * is a workaround for a possible hang in the unlikely event
1099                  * a TLB invalidation occurs during a PSD flush.
1100                  */
1101                 /* WaForceEnableNonCoherent:skl */
1102                 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1103                                   HDC_FORCE_NON_COHERENT);
1104
1105                 /* WaDisableHDCInvalidation:skl */
1106                 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1107                            BDW_DISABLE_HDC_INVALIDATION);
1108         }
1109
1110         /* WaBarrierPerformanceFixDisable:skl */
1111         if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
1112                 WA_SET_BIT_MASKED(HDC_CHICKEN0,
1113                                   HDC_FENCE_DEST_SLM_DISABLE |
1114                                   HDC_BARRIER_PERFORMANCE_DISABLE);
1115
1116         /* WaDisableSbeCacheDispatchPortSharing:skl */
1117         if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
1118                 WA_SET_BIT_MASKED(
1119                         GEN7_HALF_SLICE_CHICKEN1,
1120                         GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1121
1122         /* WaDisableLSQCROPERFforOCL:skl */
1123         ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1124         if (ret)
1125                 return ret;
1126
1127         return skl_tune_iz_hashing(engine);
1128 }
1129
1130 static int bxt_init_workarounds(struct intel_engine_cs *engine)
1131 {
1132         int ret;
1133         struct drm_device *dev = engine->dev;
1134         struct drm_i915_private *dev_priv = dev->dev_private;
1135
1136         ret = gen9_init_workarounds(engine);
1137         if (ret)
1138                 return ret;
1139
1140         /* WaStoreMultiplePTEenable:bxt */
1141         /* This is a requirement according to Hardware specification */
1142         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
1143                 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1144
1145         /* WaSetClckGatingDisableMedia:bxt */
1146         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
1147                 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1148                                             ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1149         }
1150
1151         /* WaDisableThreadStallDopClockGating:bxt */
1152         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1153                           STALL_DOP_GATING_DISABLE);
1154
1155         /* WaDisableSbeCacheDispatchPortSharing:bxt */
1156         if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
1157                 WA_SET_BIT_MASKED(
1158                         GEN7_HALF_SLICE_CHICKEN1,
1159                         GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1160         }
1161
1162         /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1163         /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1164         /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1165         /* WaDisableLSQCROPERFforOCL:bxt */
1166         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
1167                 ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1);
1168                 if (ret)
1169                         return ret;
1170
1171                 ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1172                 if (ret)
1173                         return ret;
1174         }
1175
1176         return 0;
1177 }
1178
1179 int init_workarounds_ring(struct intel_engine_cs *engine)
1180 {
1181         struct drm_device *dev = engine->dev;
1182         struct drm_i915_private *dev_priv = dev->dev_private;
1183
1184         WARN_ON(engine->id != RCS);
1185
1186         dev_priv->workarounds.count = 0;
1187         dev_priv->workarounds.hw_whitelist_count[RCS] = 0;
1188
1189         if (IS_BROADWELL(dev))
1190                 return bdw_init_workarounds(engine);
1191
1192         if (IS_CHERRYVIEW(dev))
1193                 return chv_init_workarounds(engine);
1194
1195         if (IS_SKYLAKE(dev))
1196                 return skl_init_workarounds(engine);
1197
1198         if (IS_BROXTON(dev))
1199                 return bxt_init_workarounds(engine);
1200
1201         return 0;
1202 }
1203
1204 static int init_render_ring(struct intel_engine_cs *engine)
1205 {
1206         struct drm_device *dev = engine->dev;
1207         struct drm_i915_private *dev_priv = dev->dev_private;
1208         int ret = init_ring_common(engine);
1209         if (ret)
1210                 return ret;
1211
1212         /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1213         if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
1214                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
1215
1216         /* We need to disable the AsyncFlip performance optimisations in order
1217          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1218          * programmed to '1' on all products.
1219          *
1220          * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1221          */
1222         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1223                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1224
1225         /* Required for the hardware to program scanline values for waiting */
1226         /* WaEnableFlushTlbInvalidationMode:snb */
1227         if (INTEL_INFO(dev)->gen == 6)
1228                 I915_WRITE(GFX_MODE,
1229                            _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
1230
1231         /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1232         if (IS_GEN7(dev))
1233                 I915_WRITE(GFX_MODE_GEN7,
1234                            _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1235                            _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
1236
1237         if (IS_GEN6(dev)) {
1238                 /* From the Sandybridge PRM, volume 1 part 3, page 24:
1239                  * "If this bit is set, STCunit will have LRA as replacement
1240                  *  policy. [...] This bit must be reset.  LRA replacement
1241                  *  policy is not supported."
1242                  */
1243                 I915_WRITE(CACHE_MODE_0,
1244                            _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
1245         }
1246
1247         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
1248                 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1249
1250         if (HAS_L3_DPF(dev))
1251                 I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev));
1252
1253         return init_workarounds_ring(engine);
1254 }
1255
1256 static void render_ring_cleanup(struct intel_engine_cs *engine)
1257 {
1258         struct drm_device *dev = engine->dev;
1259         struct drm_i915_private *dev_priv = dev->dev_private;
1260
1261         if (dev_priv->semaphore_obj) {
1262                 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1263                 drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
1264                 dev_priv->semaphore_obj = NULL;
1265         }
1266
1267         intel_fini_pipe_control(engine);
1268 }
1269
1270 static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
1271                            unsigned int num_dwords)
1272 {
1273 #define MBOX_UPDATE_DWORDS 8
1274         struct intel_engine_cs *signaller = signaller_req->engine;
1275         struct drm_device *dev = signaller->dev;
1276         struct drm_i915_private *dev_priv = dev->dev_private;
1277         struct intel_engine_cs *waiter;
1278         int i, ret, num_rings;
1279
1280         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1281         num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1282 #undef MBOX_UPDATE_DWORDS
1283
1284         ret = intel_ring_begin(signaller_req, num_dwords);
1285         if (ret)
1286                 return ret;
1287
1288         for_each_engine(waiter, dev_priv, i) {
1289                 u32 seqno;
1290                 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1291                 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1292                         continue;
1293
1294                 seqno = i915_gem_request_get_seqno(signaller_req);
1295                 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1296                 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1297                                            PIPE_CONTROL_QW_WRITE |
1298                                            PIPE_CONTROL_FLUSH_ENABLE);
1299                 intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1300                 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1301                 intel_ring_emit(signaller, seqno);
1302                 intel_ring_emit(signaller, 0);
1303                 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1304                                            MI_SEMAPHORE_TARGET(waiter->id));
1305                 intel_ring_emit(signaller, 0);
1306         }
1307
1308         return 0;
1309 }
1310
1311 static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
1312                            unsigned int num_dwords)
1313 {
1314 #define MBOX_UPDATE_DWORDS 6
1315         struct intel_engine_cs *signaller = signaller_req->engine;
1316         struct drm_device *dev = signaller->dev;
1317         struct drm_i915_private *dev_priv = dev->dev_private;
1318         struct intel_engine_cs *waiter;
1319         int i, ret, num_rings;
1320
1321         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1322         num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1323 #undef MBOX_UPDATE_DWORDS
1324
1325         ret = intel_ring_begin(signaller_req, num_dwords);
1326         if (ret)
1327                 return ret;
1328
1329         for_each_engine(waiter, dev_priv, i) {
1330                 u32 seqno;
1331                 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1332                 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1333                         continue;
1334
1335                 seqno = i915_gem_request_get_seqno(signaller_req);
1336                 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1337                                            MI_FLUSH_DW_OP_STOREDW);
1338                 intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1339                                            MI_FLUSH_DW_USE_GTT);
1340                 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1341                 intel_ring_emit(signaller, seqno);
1342                 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1343                                            MI_SEMAPHORE_TARGET(waiter->id));
1344                 intel_ring_emit(signaller, 0);
1345         }
1346
1347         return 0;
1348 }
1349
1350 static int gen6_signal(struct drm_i915_gem_request *signaller_req,
1351                        unsigned int num_dwords)
1352 {
1353         struct intel_engine_cs *signaller = signaller_req->engine;
1354         struct drm_device *dev = signaller->dev;
1355         struct drm_i915_private *dev_priv = dev->dev_private;
1356         struct intel_engine_cs *useless;
1357         int i, ret, num_rings;
1358
1359 #define MBOX_UPDATE_DWORDS 3
1360         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1361         num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1362 #undef MBOX_UPDATE_DWORDS
1363
1364         ret = intel_ring_begin(signaller_req, num_dwords);
1365         if (ret)
1366                 return ret;
1367
1368         for_each_engine(useless, dev_priv, i) {
1369                 i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];
1370
1371                 if (i915_mmio_reg_valid(mbox_reg)) {
1372                         u32 seqno = i915_gem_request_get_seqno(signaller_req);
1373
1374                         intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
1375                         intel_ring_emit_reg(signaller, mbox_reg);
1376                         intel_ring_emit(signaller, seqno);
1377                 }
1378         }
1379
1380         /* If num_dwords was rounded, make sure the tail pointer is correct */
1381         if (num_rings % 2 == 0)
1382                 intel_ring_emit(signaller, MI_NOOP);
1383
1384         return 0;
1385 }
1386
1387 /**
1388  * gen6_add_request - Update the semaphore mailbox registers
1389  *
1390  * @request - request to write to the ring
1391  *
1392  * Update the mailbox registers in the *other* rings with the current seqno.
1393  * This acts like a signal in the canonical semaphore.
1394  */
1395 static int
1396 gen6_add_request(struct drm_i915_gem_request *req)
1397 {
1398         struct intel_engine_cs *engine = req->engine;
1399         int ret;
1400
1401         if (engine->semaphore.signal)
1402                 ret = engine->semaphore.signal(req, 4);
1403         else
1404                 ret = intel_ring_begin(req, 4);
1405
1406         if (ret)
1407                 return ret;
1408
1409         intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
1410         intel_ring_emit(engine,
1411                         I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1412         intel_ring_emit(engine, i915_gem_request_get_seqno(req));
1413         intel_ring_emit(engine, MI_USER_INTERRUPT);
1414         __intel_ring_advance(engine);
1415
1416         return 0;
1417 }
1418
1419 static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1420                                               u32 seqno)
1421 {
1422         struct drm_i915_private *dev_priv = dev->dev_private;
1423         return dev_priv->last_seqno < seqno;
1424 }
1425
1426 /**
1427  * intel_ring_sync - sync the waiter to the signaller on seqno
1428  *
1429  * @waiter - ring that is waiting
1430  * @signaller - ring which has, or will signal
1431  * @seqno - seqno which the waiter will block on
1432  */
1433
1434 static int
1435 gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
1436                struct intel_engine_cs *signaller,
1437                u32 seqno)
1438 {
1439         struct intel_engine_cs *waiter = waiter_req->engine;
1440         struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1441         int ret;
1442
1443         ret = intel_ring_begin(waiter_req, 4);
1444         if (ret)
1445                 return ret;
1446
1447         intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1448                                 MI_SEMAPHORE_GLOBAL_GTT |
1449                                 MI_SEMAPHORE_POLL |
1450                                 MI_SEMAPHORE_SAD_GTE_SDD);
1451         intel_ring_emit(waiter, seqno);
1452         intel_ring_emit(waiter,
1453                         lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1454         intel_ring_emit(waiter,
1455                         upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1456         intel_ring_advance(waiter);
1457         return 0;
1458 }
1459
1460 static int
1461 gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
1462                struct intel_engine_cs *signaller,
1463                u32 seqno)
1464 {
1465         struct intel_engine_cs *waiter = waiter_req->engine;
1466         u32 dw1 = MI_SEMAPHORE_MBOX |
1467                   MI_SEMAPHORE_COMPARE |
1468                   MI_SEMAPHORE_REGISTER;
1469         u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1470         int ret;
1471
1472         /* Throughout all of the GEM code, seqno passed implies our current
1473          * seqno is >= the last seqno executed. However for hardware the
1474          * comparison is strictly greater than.
1475          */
1476         seqno -= 1;
1477
1478         WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1479
1480         ret = intel_ring_begin(waiter_req, 4);
1481         if (ret)
1482                 return ret;
1483
1484         /* If seqno wrap happened, omit the wait with no-ops */
1485         if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
1486                 intel_ring_emit(waiter, dw1 | wait_mbox);
1487                 intel_ring_emit(waiter, seqno);
1488                 intel_ring_emit(waiter, 0);
1489                 intel_ring_emit(waiter, MI_NOOP);
1490         } else {
1491                 intel_ring_emit(waiter, MI_NOOP);
1492                 intel_ring_emit(waiter, MI_NOOP);
1493                 intel_ring_emit(waiter, MI_NOOP);
1494                 intel_ring_emit(waiter, MI_NOOP);
1495         }
1496         intel_ring_advance(waiter);
1497
1498         return 0;
1499 }
1500
1501 #define PIPE_CONTROL_FLUSH(ring__, addr__)                                      \
1502 do {                                                                    \
1503         intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |                \
1504                  PIPE_CONTROL_DEPTH_STALL);                             \
1505         intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);                    \
1506         intel_ring_emit(ring__, 0);                                                     \
1507         intel_ring_emit(ring__, 0);                                                     \
1508 } while (0)
1509
1510 static int
1511 pc_render_add_request(struct drm_i915_gem_request *req)
1512 {
1513         struct intel_engine_cs *engine = req->engine;
1514         u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1515         int ret;
1516
1517         /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1518          * incoherent with writes to memory, i.e. completely fubar,
1519          * so we need to use PIPE_NOTIFY instead.
1520          *
1521          * However, we also need to workaround the qword write
1522          * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1523          * memory before requesting an interrupt.
1524          */
1525         ret = intel_ring_begin(req, 32);
1526         if (ret)
1527                 return ret;
1528
1529         intel_ring_emit(engine,
1530                         GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1531                         PIPE_CONTROL_WRITE_FLUSH |
1532                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
1533         intel_ring_emit(engine,
1534                         engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1535         intel_ring_emit(engine, i915_gem_request_get_seqno(req));
1536         intel_ring_emit(engine, 0);
1537         PIPE_CONTROL_FLUSH(engine, scratch_addr);
1538         scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
1539         PIPE_CONTROL_FLUSH(engine, scratch_addr);
1540         scratch_addr += 2 * CACHELINE_BYTES;
1541         PIPE_CONTROL_FLUSH(engine, scratch_addr);
1542         scratch_addr += 2 * CACHELINE_BYTES;
1543         PIPE_CONTROL_FLUSH(engine, scratch_addr);
1544         scratch_addr += 2 * CACHELINE_BYTES;
1545         PIPE_CONTROL_FLUSH(engine, scratch_addr);
1546         scratch_addr += 2 * CACHELINE_BYTES;
1547         PIPE_CONTROL_FLUSH(engine, scratch_addr);
1548
1549         intel_ring_emit(engine,
1550                         GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1551                         PIPE_CONTROL_WRITE_FLUSH |
1552                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1553                         PIPE_CONTROL_NOTIFY);
1554         intel_ring_emit(engine,
1555                         engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1556         intel_ring_emit(engine, i915_gem_request_get_seqno(req));
1557         intel_ring_emit(engine, 0);
1558         __intel_ring_advance(engine);
1559
1560         return 0;
1561 }
1562
1563 static u32
1564 gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
1565 {
1566         /* Workaround to force correct ordering between irq and seqno writes on
1567          * ivb (and maybe also on snb) by reading from a CS register (like
1568          * ACTHD) before reading the status page. */
1569         if (!lazy_coherency) {
1570                 struct drm_i915_private *dev_priv = engine->dev->dev_private;
1571                 POSTING_READ(RING_ACTHD(engine->mmio_base));
1572         }
1573
1574         return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
1575 }
1576
1577 static u32
1578 ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
1579 {
1580         return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
1581 }
1582
1583 static void
1584 ring_set_seqno(struct intel_engine_cs *engine, u32 seqno)
1585 {
1586         intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
1587 }
1588
1589 static u32
1590 pc_render_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
1591 {
1592         return engine->scratch.cpu_page[0];
1593 }
1594
1595 static void
1596 pc_render_set_seqno(struct intel_engine_cs *engine, u32 seqno)
1597 {
1598         engine->scratch.cpu_page[0] = seqno;
1599 }
1600
1601 static bool
1602 gen5_ring_get_irq(struct intel_engine_cs *engine)
1603 {
1604         struct drm_device *dev = engine->dev;
1605         struct drm_i915_private *dev_priv = dev->dev_private;
1606         unsigned long flags;
1607
1608         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1609                 return false;
1610
1611         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1612         if (engine->irq_refcount++ == 0)
1613                 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask);
1614         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1615
1616         return true;
1617 }
1618
1619 static void
1620 gen5_ring_put_irq(struct intel_engine_cs *engine)
1621 {
1622         struct drm_device *dev = engine->dev;
1623         struct drm_i915_private *dev_priv = dev->dev_private;
1624         unsigned long flags;
1625
1626         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1627         if (--engine->irq_refcount == 0)
1628                 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask);
1629         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1630 }
1631
1632 static bool
1633 i9xx_ring_get_irq(struct intel_engine_cs *engine)
1634 {
1635         struct drm_device *dev = engine->dev;
1636         struct drm_i915_private *dev_priv = dev->dev_private;
1637         unsigned long flags;
1638
1639         if (!intel_irqs_enabled(dev_priv))
1640                 return false;
1641
1642         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1643         if (engine->irq_refcount++ == 0) {
1644                 dev_priv->irq_mask &= ~engine->irq_enable_mask;
1645                 I915_WRITE(IMR, dev_priv->irq_mask);
1646                 POSTING_READ(IMR);
1647         }
1648         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1649
1650         return true;
1651 }
1652
1653 static void
1654 i9xx_ring_put_irq(struct intel_engine_cs *engine)
1655 {
1656         struct drm_device *dev = engine->dev;
1657         struct drm_i915_private *dev_priv = dev->dev_private;
1658         unsigned long flags;
1659
1660         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1661         if (--engine->irq_refcount == 0) {
1662                 dev_priv->irq_mask |= engine->irq_enable_mask;
1663                 I915_WRITE(IMR, dev_priv->irq_mask);
1664                 POSTING_READ(IMR);
1665         }
1666         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1667 }
1668
1669 static bool
1670 i8xx_ring_get_irq(struct intel_engine_cs *engine)
1671 {
1672         struct drm_device *dev = engine->dev;
1673         struct drm_i915_private *dev_priv = dev->dev_private;
1674         unsigned long flags;
1675
1676         if (!intel_irqs_enabled(dev_priv))
1677                 return false;
1678
1679         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1680         if (engine->irq_refcount++ == 0) {
1681                 dev_priv->irq_mask &= ~engine->irq_enable_mask;
1682                 I915_WRITE16(IMR, dev_priv->irq_mask);
1683                 POSTING_READ16(IMR);
1684         }
1685         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1686
1687         return true;
1688 }
1689
1690 static void
1691 i8xx_ring_put_irq(struct intel_engine_cs *engine)
1692 {
1693         struct drm_device *dev = engine->dev;
1694         struct drm_i915_private *dev_priv = dev->dev_private;
1695         unsigned long flags;
1696
1697         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1698         if (--engine->irq_refcount == 0) {
1699                 dev_priv->irq_mask |= engine->irq_enable_mask;
1700                 I915_WRITE16(IMR, dev_priv->irq_mask);
1701                 POSTING_READ16(IMR);
1702         }
1703         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1704 }
1705
1706 static int
1707 bsd_ring_flush(struct drm_i915_gem_request *req,
1708                u32     invalidate_domains,
1709                u32     flush_domains)
1710 {
1711         struct intel_engine_cs *engine = req->engine;
1712         int ret;
1713
1714         ret = intel_ring_begin(req, 2);
1715         if (ret)
1716                 return ret;
1717
1718         intel_ring_emit(engine, MI_FLUSH);
1719         intel_ring_emit(engine, MI_NOOP);
1720         intel_ring_advance(engine);
1721         return 0;
1722 }
1723
1724 static int
1725 i9xx_add_request(struct drm_i915_gem_request *req)
1726 {
1727         struct intel_engine_cs *engine = req->engine;
1728         int ret;
1729
1730         ret = intel_ring_begin(req, 4);
1731         if (ret)
1732                 return ret;
1733
1734         intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
1735         intel_ring_emit(engine,
1736                         I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1737         intel_ring_emit(engine, i915_gem_request_get_seqno(req));
1738         intel_ring_emit(engine, MI_USER_INTERRUPT);
1739         __intel_ring_advance(engine);
1740
1741         return 0;
1742 }
1743
1744 static bool
1745 gen6_ring_get_irq(struct intel_engine_cs *engine)
1746 {
1747         struct drm_device *dev = engine->dev;
1748         struct drm_i915_private *dev_priv = dev->dev_private;
1749         unsigned long flags;
1750
1751         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1752                 return false;
1753
1754         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1755         if (engine->irq_refcount++ == 0) {
1756                 if (HAS_L3_DPF(dev) && engine->id == RCS)
1757                         I915_WRITE_IMR(engine,
1758                                        ~(engine->irq_enable_mask |
1759                                          GT_PARITY_ERROR(dev)));
1760                 else
1761                         I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
1762                 gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask);
1763         }
1764         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1765
1766         return true;
1767 }
1768
1769 static void
1770 gen6_ring_put_irq(struct intel_engine_cs *engine)
1771 {
1772         struct drm_device *dev = engine->dev;
1773         struct drm_i915_private *dev_priv = dev->dev_private;
1774         unsigned long flags;
1775
1776         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1777         if (--engine->irq_refcount == 0) {
1778                 if (HAS_L3_DPF(dev) && engine->id == RCS)
1779                         I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev));
1780                 else
1781                         I915_WRITE_IMR(engine, ~0);
1782                 gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask);
1783         }
1784         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1785 }
1786
1787 static bool
1788 hsw_vebox_get_irq(struct intel_engine_cs *engine)
1789 {
1790         struct drm_device *dev = engine->dev;
1791         struct drm_i915_private *dev_priv = dev->dev_private;
1792         unsigned long flags;
1793
1794         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1795                 return false;
1796
1797         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1798         if (engine->irq_refcount++ == 0) {
1799                 I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
1800                 gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask);
1801         }
1802         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1803
1804         return true;
1805 }
1806
1807 static void
1808 hsw_vebox_put_irq(struct intel_engine_cs *engine)
1809 {
1810         struct drm_device *dev = engine->dev;
1811         struct drm_i915_private *dev_priv = dev->dev_private;
1812         unsigned long flags;
1813
1814         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1815         if (--engine->irq_refcount == 0) {
1816                 I915_WRITE_IMR(engine, ~0);
1817                 gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask);
1818         }
1819         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1820 }
1821
1822 static bool
1823 gen8_ring_get_irq(struct intel_engine_cs *engine)
1824 {
1825         struct drm_device *dev = engine->dev;
1826         struct drm_i915_private *dev_priv = dev->dev_private;
1827         unsigned long flags;
1828
1829         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1830                 return false;
1831
1832         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1833         if (engine->irq_refcount++ == 0) {
1834                 if (HAS_L3_DPF(dev) && engine->id == RCS) {
1835                         I915_WRITE_IMR(engine,
1836                                        ~(engine->irq_enable_mask |
1837                                          GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1838                 } else {
1839                         I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
1840                 }
1841                 POSTING_READ(RING_IMR(engine->mmio_base));
1842         }
1843         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1844
1845         return true;
1846 }
1847
1848 static void
1849 gen8_ring_put_irq(struct intel_engine_cs *engine)
1850 {
1851         struct drm_device *dev = engine->dev;
1852         struct drm_i915_private *dev_priv = dev->dev_private;
1853         unsigned long flags;
1854
1855         spin_lock_irqsave(&dev_priv->irq_lock, flags);
1856         if (--engine->irq_refcount == 0) {
1857                 if (HAS_L3_DPF(dev) && engine->id == RCS) {
1858                         I915_WRITE_IMR(engine,
1859                                        ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1860                 } else {
1861                         I915_WRITE_IMR(engine, ~0);
1862                 }
1863                 POSTING_READ(RING_IMR(engine->mmio_base));
1864         }
1865         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1866 }
1867
1868 static int
1869 i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
1870                          u64 offset, u32 length,
1871                          unsigned dispatch_flags)
1872 {
1873         struct intel_engine_cs *engine = req->engine;
1874         int ret;
1875
1876         ret = intel_ring_begin(req, 2);
1877         if (ret)
1878                 return ret;
1879
1880         intel_ring_emit(engine,
1881                         MI_BATCH_BUFFER_START |
1882                         MI_BATCH_GTT |
1883                         (dispatch_flags & I915_DISPATCH_SECURE ?
1884                          0 : MI_BATCH_NON_SECURE_I965));
1885         intel_ring_emit(engine, offset);
1886         intel_ring_advance(engine);
1887
1888         return 0;
1889 }
1890
1891 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1892 #define I830_BATCH_LIMIT (256*1024)
1893 #define I830_TLB_ENTRIES (2)
1894 #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1895 static int
1896 i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1897                          u64 offset, u32 len,
1898                          unsigned dispatch_flags)
1899 {
1900         struct intel_engine_cs *engine = req->engine;
1901         u32 cs_offset = engine->scratch.gtt_offset;
1902         int ret;
1903
1904         ret = intel_ring_begin(req, 6);
1905         if (ret)
1906                 return ret;
1907
1908         /* Evict the invalid PTE TLBs */
1909         intel_ring_emit(engine, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1910         intel_ring_emit(engine, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1911         intel_ring_emit(engine, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1912         intel_ring_emit(engine, cs_offset);
1913         intel_ring_emit(engine, 0xdeadbeef);
1914         intel_ring_emit(engine, MI_NOOP);
1915         intel_ring_advance(engine);
1916
1917         if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
1918                 if (len > I830_BATCH_LIMIT)
1919                         return -ENOSPC;
1920
1921                 ret = intel_ring_begin(req, 6 + 2);
1922                 if (ret)
1923                         return ret;
1924
1925                 /* Blit the batch (which has now all relocs applied) to the
1926                  * stable batch scratch bo area (so that the CS never
1927                  * stumbles over its tlb invalidation bug) ...
1928                  */
1929                 intel_ring_emit(engine, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1930                 intel_ring_emit(engine,
1931                                 BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
1932                 intel_ring_emit(engine, DIV_ROUND_UP(len, 4096) << 16 | 4096);
1933                 intel_ring_emit(engine, cs_offset);
1934                 intel_ring_emit(engine, 4096);
1935                 intel_ring_emit(engine, offset);
1936
1937                 intel_ring_emit(engine, MI_FLUSH);
1938                 intel_ring_emit(engine, MI_NOOP);
1939                 intel_ring_advance(engine);
1940
1941                 /* ... and execute it. */
1942                 offset = cs_offset;
1943         }
1944
1945         ret = intel_ring_begin(req, 2);
1946         if (ret)
1947                 return ret;
1948
1949         intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1950         intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1951                                           0 : MI_BATCH_NON_SECURE));
1952         intel_ring_advance(engine);
1953
1954         return 0;
1955 }
1956
1957 static int
1958 i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
1959                          u64 offset, u32 len,
1960                          unsigned dispatch_flags)
1961 {
1962         struct intel_engine_cs *engine = req->engine;
1963         int ret;
1964
1965         ret = intel_ring_begin(req, 2);
1966         if (ret)
1967                 return ret;
1968
1969         intel_ring_emit(engine, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1970         intel_ring_emit(engine, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1971                                           0 : MI_BATCH_NON_SECURE));
1972         intel_ring_advance(engine);
1973
1974         return 0;
1975 }
1976
1977 static void cleanup_phys_status_page(struct intel_engine_cs *engine)
1978 {
1979         struct drm_i915_private *dev_priv = to_i915(engine->dev);
1980
1981         if (!dev_priv->status_page_dmah)
1982                 return;
1983
1984         drm_pci_free(engine->dev, dev_priv->status_page_dmah);
1985         engine->status_page.page_addr = NULL;
1986 }
1987
1988 static void cleanup_status_page(struct intel_engine_cs *engine)
1989 {
1990         struct drm_i915_gem_object *obj;
1991
1992         obj = engine->status_page.obj;
1993         if (obj == NULL)
1994                 return;
1995
1996         kunmap(sg_page(obj->pages->sgl));
1997         i915_gem_object_ggtt_unpin(obj);
1998         drm_gem_object_unreference(&obj->base);
1999         engine->status_page.obj = NULL;
2000 }
2001
2002 static int init_status_page(struct intel_engine_cs *engine)
2003 {
2004         struct drm_i915_gem_object *obj = engine->status_page.obj;
2005
2006         if (obj == NULL) {
2007                 unsigned flags;
2008                 int ret;
2009
2010                 obj = i915_gem_alloc_object(engine->dev, 4096);
2011                 if (obj == NULL) {
2012                         DRM_ERROR("Failed to allocate status page\n");
2013                         return -ENOMEM;
2014                 }
2015
2016                 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2017                 if (ret)
2018                         goto err_unref;
2019
2020                 flags = 0;
2021                 if (!HAS_LLC(engine->dev))
2022                         /* On g33, we cannot place HWS above 256MiB, so
2023                          * restrict its pinning to the low mappable arena.
2024                          * Though this restriction is not documented for
2025                          * gen4, gen5, or byt, they also behave similarly
2026                          * and hang if the HWS is placed at the top of the
2027                          * GTT. To generalise, it appears that all !llc
2028                          * platforms have issues with us placing the HWS
2029                          * above the mappable region (even though we never
2030                          * actualy map it).
2031                          */
2032                         flags |= PIN_MAPPABLE;
2033                 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
2034                 if (ret) {
2035 err_unref:
2036                         drm_gem_object_unreference(&obj->base);
2037                         return ret;
2038                 }
2039
2040                 engine->status_page.obj = obj;
2041         }
2042
2043         engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
2044         engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
2045         memset(engine->status_page.page_addr, 0, PAGE_SIZE);
2046
2047         DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
2048                         engine->name, engine->status_page.gfx_addr);
2049
2050         return 0;
2051 }
2052
2053 static int init_phys_status_page(struct intel_engine_cs *engine)
2054 {
2055         struct drm_i915_private *dev_priv = engine->dev->dev_private;
2056
2057         if (!dev_priv->status_page_dmah) {
2058                 dev_priv->status_page_dmah =
2059                         drm_pci_alloc(engine->dev, PAGE_SIZE, PAGE_SIZE);
2060                 if (!dev_priv->status_page_dmah)
2061                         return -ENOMEM;
2062         }
2063
2064         engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
2065         memset(engine->status_page.page_addr, 0, PAGE_SIZE);
2066
2067         return 0;
2068 }
2069
2070 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2071 {
2072         if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
2073                 vunmap(ringbuf->virtual_start);
2074         else
2075                 iounmap(ringbuf->virtual_start);
2076         ringbuf->virtual_start = NULL;
2077         ringbuf->vma = NULL;
2078         i915_gem_object_ggtt_unpin(ringbuf->obj);
2079 }
2080
2081 static u32 *vmap_obj(struct drm_i915_gem_object *obj)
2082 {
2083         struct sg_page_iter sg_iter;
2084         struct page **pages;
2085         void *addr;
2086         int i;
2087
2088         pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
2089         if (pages == NULL)
2090                 return NULL;
2091
2092         i = 0;
2093         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0)
2094                 pages[i++] = sg_page_iter_page(&sg_iter);
2095
2096         addr = vmap(pages, i, 0, PAGE_KERNEL);
2097         drm_free_large(pages);
2098
2099         return addr;
2100 }
2101
2102 int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
2103                                      struct intel_ringbuffer *ringbuf)
2104 {
2105         struct drm_i915_private *dev_priv = to_i915(dev);
2106         struct drm_i915_gem_object *obj = ringbuf->obj;
2107         int ret;
2108
2109         if (HAS_LLC(dev_priv) && !obj->stolen) {
2110                 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0);
2111                 if (ret)
2112                         return ret;
2113
2114                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
2115                 if (ret) {
2116                         i915_gem_object_ggtt_unpin(obj);
2117                         return ret;
2118                 }
2119
2120                 ringbuf->virtual_start = vmap_obj(obj);
2121                 if (ringbuf->virtual_start == NULL) {
2122                         i915_gem_object_ggtt_unpin(obj);
2123                         return -ENOMEM;
2124                 }
2125         } else {
2126                 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
2127                 if (ret)
2128                         return ret;
2129
2130                 ret = i915_gem_object_set_to_gtt_domain(obj, true);
2131                 if (ret) {
2132                         i915_gem_object_ggtt_unpin(obj);
2133                         return ret;
2134                 }
2135
2136                 /* Access through the GTT requires the device to be awake. */
2137                 assert_rpm_wakelock_held(dev_priv);
2138
2139                 ringbuf->virtual_start = ioremap_wc(dev_priv->ggtt.mappable_base +
2140                                                     i915_gem_obj_ggtt_offset(obj), ringbuf->size);
2141                 if (ringbuf->virtual_start == NULL) {
2142                         i915_gem_object_ggtt_unpin(obj);
2143                         return -EINVAL;
2144                 }
2145         }
2146
2147         ringbuf->vma = i915_gem_obj_to_ggtt(obj);
2148
2149         return 0;
2150 }
2151
2152 static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2153 {
2154         drm_gem_object_unreference(&ringbuf->obj->base);
2155         ringbuf->obj = NULL;
2156 }
2157
2158 static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2159                                       struct intel_ringbuffer *ringbuf)
2160 {
2161         struct drm_i915_gem_object *obj;
2162
2163         obj = NULL;
2164         if (!HAS_LLC(dev))
2165                 obj = i915_gem_object_create_stolen(dev, ringbuf->size);
2166         if (obj == NULL)
2167                 obj = i915_gem_alloc_object(dev, ringbuf->size);
2168         if (obj == NULL)
2169                 return -ENOMEM;
2170
2171         /* mark ring buffers as read-only from GPU side by default */
2172         obj->gt_ro = 1;
2173
2174         ringbuf->obj = obj;
2175
2176         return 0;
2177 }
2178
2179 struct intel_ringbuffer *
2180 intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2181 {
2182         struct intel_ringbuffer *ring;
2183         int ret;
2184
2185         ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2186         if (ring == NULL) {
2187                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
2188                                  engine->name);
2189                 return ERR_PTR(-ENOMEM);
2190         }
2191
2192         ring->engine = engine;
2193         list_add(&ring->link, &engine->buffers);
2194
2195         ring->size = size;
2196         /* Workaround an erratum on the i830 which causes a hang if
2197          * the TAIL pointer points to within the last 2 cachelines
2198          * of the buffer.
2199          */
2200         ring->effective_size = size;
2201         if (IS_I830(engine->dev) || IS_845G(engine->dev))
2202                 ring->effective_size -= 2 * CACHELINE_BYTES;
2203
2204         ring->last_retired_head = -1;
2205         intel_ring_update_space(ring);
2206
2207         ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
2208         if (ret) {
2209                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
2210                                  engine->name, ret);
2211                 list_del(&ring->link);
2212                 kfree(ring);
2213                 return ERR_PTR(ret);
2214         }
2215
2216         return ring;
2217 }
2218
2219 void
2220 intel_ringbuffer_free(struct intel_ringbuffer *ring)
2221 {
2222         intel_destroy_ringbuffer_obj(ring);
2223         list_del(&ring->link);
2224         kfree(ring);
2225 }
2226
2227 static int intel_init_ring_buffer(struct drm_device *dev,
2228                                   struct intel_engine_cs *engine)
2229 {
2230         struct intel_ringbuffer *ringbuf;
2231         int ret;
2232
2233         WARN_ON(engine->buffer);
2234
2235         engine->dev = dev;
2236         INIT_LIST_HEAD(&engine->active_list);
2237         INIT_LIST_HEAD(&engine->request_list);
2238         INIT_LIST_HEAD(&engine->execlist_queue);
2239         INIT_LIST_HEAD(&engine->buffers);
2240         i915_gem_batch_pool_init(dev, &engine->batch_pool);
2241         memset(engine->semaphore.sync_seqno, 0,
2242                sizeof(engine->semaphore.sync_seqno));
2243
2244         init_waitqueue_head(&engine->irq_queue);
2245
2246         ringbuf = intel_engine_create_ringbuffer(engine, 32 * PAGE_SIZE);
2247         if (IS_ERR(ringbuf)) {
2248                 ret = PTR_ERR(ringbuf);
2249                 goto error;
2250         }
2251         engine->buffer = ringbuf;
2252
2253         if (I915_NEED_GFX_HWS(dev)) {
2254                 ret = init_status_page(engine);
2255                 if (ret)
2256                         goto error;
2257         } else {
2258                 WARN_ON(engine->id != RCS);
2259                 ret = init_phys_status_page(engine);
2260                 if (ret)
2261                         goto error;
2262         }
2263
2264         ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2265         if (ret) {
2266                 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2267                                 engine->name, ret);
2268                 intel_destroy_ringbuffer_obj(ringbuf);
2269                 goto error;
2270         }
2271
2272         ret = i915_cmd_parser_init_ring(engine);
2273         if (ret)
2274                 goto error;
2275
2276         return 0;
2277
2278 error:
2279         intel_cleanup_engine(engine);
2280         return ret;
2281 }
2282
2283 void intel_cleanup_engine(struct intel_engine_cs *engine)
2284 {
2285         struct drm_i915_private *dev_priv;
2286
2287         if (!intel_engine_initialized(engine))
2288                 return;
2289
2290         dev_priv = to_i915(engine->dev);
2291
2292         if (engine->buffer) {
2293                 intel_stop_engine(engine);
2294                 WARN_ON(!IS_GEN2(engine->dev) && (I915_READ_MODE(engine) & MODE_IDLE) == 0);
2295
2296                 intel_unpin_ringbuffer_obj(engine->buffer);
2297                 intel_ringbuffer_free(engine->buffer);
2298                 engine->buffer = NULL;
2299         }
2300
2301         if (engine->cleanup)
2302                 engine->cleanup(engine);
2303
2304         if (I915_NEED_GFX_HWS(engine->dev)) {
2305                 cleanup_status_page(engine);
2306         } else {
2307                 WARN_ON(engine->id != RCS);
2308                 cleanup_phys_status_page(engine);
2309         }
2310
2311         i915_cmd_parser_fini_ring(engine);
2312         i915_gem_batch_pool_fini(&engine->batch_pool);
2313         engine->dev = NULL;
2314 }
2315
2316 static int ring_wait_for_space(struct intel_engine_cs *engine, int n)
2317 {
2318         struct intel_ringbuffer *ringbuf = engine->buffer;
2319         struct drm_i915_gem_request *request;
2320         unsigned space;
2321         int ret;
2322
2323         if (intel_ring_space(ringbuf) >= n)
2324                 return 0;
2325
2326         /* The whole point of reserving space is to not wait! */
2327         WARN_ON(ringbuf->reserved_in_use);
2328
2329         list_for_each_entry(request, &engine->request_list, list) {
2330                 space = __intel_ring_space(request->postfix, ringbuf->tail,
2331                                            ringbuf->size);
2332                 if (space >= n)
2333                         break;
2334         }
2335
2336         if (WARN_ON(&request->list == &engine->request_list))
2337                 return -ENOSPC;
2338
2339         ret = i915_wait_request(request);
2340         if (ret)
2341                 return ret;
2342
2343         ringbuf->space = space;
2344         return 0;
2345 }
2346
2347 static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
2348 {
2349         uint32_t __iomem *virt;
2350         int rem = ringbuf->size - ringbuf->tail;
2351
2352         virt = ringbuf->virtual_start + ringbuf->tail;
2353         rem /= 4;
2354         while (rem--)
2355                 iowrite32(MI_NOOP, virt++);
2356
2357         ringbuf->tail = 0;
2358         intel_ring_update_space(ringbuf);
2359 }
2360
2361 int intel_engine_idle(struct intel_engine_cs *engine)
2362 {
2363         struct drm_i915_gem_request *req;
2364
2365         /* Wait upon the last request to be completed */
2366         if (list_empty(&engine->request_list))
2367                 return 0;
2368
2369         req = list_entry(engine->request_list.prev,
2370                          struct drm_i915_gem_request,
2371                          list);
2372
2373         /* Make sure we do not trigger any retires */
2374         return __i915_wait_request(req,
2375                                    atomic_read(&to_i915(engine->dev)->gpu_error.reset_counter),
2376                                    to_i915(engine->dev)->mm.interruptible,
2377                                    NULL, NULL);
2378 }
2379
2380 int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
2381 {
2382         request->ringbuf = request->engine->buffer;
2383         return 0;
2384 }
2385
2386 int intel_ring_reserve_space(struct drm_i915_gem_request *request)
2387 {
2388         /*
2389          * The first call merely notes the reserve request and is common for
2390          * all back ends. The subsequent localised _begin() call actually
2391          * ensures that the reservation is available. Without the begin, if
2392          * the request creator immediately submitted the request without
2393          * adding any commands to it then there might not actually be
2394          * sufficient room for the submission commands.
2395          */
2396         intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
2397
2398         return intel_ring_begin(request, 0);
2399 }
2400
2401 void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
2402 {
2403         WARN_ON(ringbuf->reserved_size);
2404         WARN_ON(ringbuf->reserved_in_use);
2405
2406         ringbuf->reserved_size = size;
2407 }
2408
2409 void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
2410 {
2411         WARN_ON(ringbuf->reserved_in_use);
2412
2413         ringbuf->reserved_size   = 0;
2414         ringbuf->reserved_in_use = false;
2415 }
2416
2417 void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2418 {
2419         WARN_ON(ringbuf->reserved_in_use);
2420
2421         ringbuf->reserved_in_use = true;
2422         ringbuf->reserved_tail   = ringbuf->tail;
2423 }
2424
2425 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
2426 {
2427         WARN_ON(!ringbuf->reserved_in_use);
2428         if (ringbuf->tail > ringbuf->reserved_tail) {
2429                 WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
2430                      "request reserved size too small: %d vs %d!\n",
2431                      ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
2432         } else {
2433                 /*
2434                  * The ring was wrapped while the reserved space was in use.
2435                  * That means that some unknown amount of the ring tail was
2436                  * no-op filled and skipped. Thus simply adding the ring size
2437                  * to the tail and doing the above space check will not work.
2438                  * Rather than attempt to track how much tail was skipped,
2439                  * it is much simpler to say that also skipping the sanity
2440                  * check every once in a while is not a big issue.
2441                  */
2442         }
2443
2444         ringbuf->reserved_size   = 0;
2445         ringbuf->reserved_in_use = false;
2446 }
2447
2448 static int __intel_ring_prepare(struct intel_engine_cs *engine, int bytes)
2449 {
2450         struct intel_ringbuffer *ringbuf = engine->buffer;
2451         int remain_usable = ringbuf->effective_size - ringbuf->tail;
2452         int remain_actual = ringbuf->size - ringbuf->tail;
2453         int ret, total_bytes, wait_bytes = 0;
2454         bool need_wrap = false;
2455
2456         if (ringbuf->reserved_in_use)
2457                 total_bytes = bytes;
2458         else
2459                 total_bytes = bytes + ringbuf->reserved_size;
2460
2461         if (unlikely(bytes > remain_usable)) {
2462                 /*
2463                  * Not enough space for the basic request. So need to flush
2464                  * out the remainder and then wait for base + reserved.
2465                  */
2466                 wait_bytes = remain_actual + total_bytes;
2467                 need_wrap = true;
2468         } else {
2469                 if (unlikely(total_bytes > remain_usable)) {
2470                         /*
2471                          * The base request will fit but the reserved space
2472                          * falls off the end. So only need to to wait for the
2473                          * reserved size after flushing out the remainder.
2474                          */
2475                         wait_bytes = remain_actual + ringbuf->reserved_size;
2476                         need_wrap = true;
2477                 } else if (total_bytes > ringbuf->space) {
2478                         /* No wrapping required, just waiting. */
2479                         wait_bytes = total_bytes;
2480                 }
2481         }
2482
2483         if (wait_bytes) {
2484                 ret = ring_wait_for_space(engine, wait_bytes);
2485                 if (unlikely(ret))
2486                         return ret;
2487
2488                 if (need_wrap)
2489                         __wrap_ring_buffer(ringbuf);
2490         }
2491
2492         return 0;
2493 }
2494
2495 int intel_ring_begin(struct drm_i915_gem_request *req,
2496                      int num_dwords)
2497 {
2498         struct intel_engine_cs *engine;
2499         struct drm_i915_private *dev_priv;
2500         int ret;
2501
2502         WARN_ON(req == NULL);
2503         engine = req->engine;
2504         dev_priv = req->i915;
2505
2506         ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2507                                    dev_priv->mm.interruptible);
2508         if (ret)
2509                 return ret;
2510
2511         ret = __intel_ring_prepare(engine, num_dwords * sizeof(uint32_t));
2512         if (ret)
2513                 return ret;
2514
2515         engine->buffer->space -= num_dwords * sizeof(uint32_t);
2516         return 0;
2517 }
2518
2519 /* Align the ring tail to a cacheline boundary */
2520 int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
2521 {
2522         struct intel_engine_cs *engine = req->engine;
2523         int num_dwords = (engine->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2524         int ret;
2525
2526         if (num_dwords == 0)
2527                 return 0;
2528
2529         num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
2530         ret = intel_ring_begin(req, num_dwords);
2531         if (ret)
2532                 return ret;
2533
2534         while (num_dwords--)
2535                 intel_ring_emit(engine, MI_NOOP);
2536
2537         intel_ring_advance(engine);
2538
2539         return 0;
2540 }
2541
2542 void intel_ring_init_seqno(struct intel_engine_cs *engine, u32 seqno)
2543 {
2544         struct drm_device *dev = engine->dev;
2545         struct drm_i915_private *dev_priv = dev->dev_private;
2546
2547         if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
2548                 I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
2549                 I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
2550                 if (HAS_VEBOX(dev))
2551                         I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
2552         }
2553
2554         engine->set_seqno(engine, seqno);
2555         engine->hangcheck.seqno = seqno;
2556 }
2557
2558 static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
2559                                      u32 value)
2560 {
2561         struct drm_i915_private *dev_priv = engine->dev->dev_private;
2562
2563        /* Every tail move must follow the sequence below */
2564
2565         /* Disable notification that the ring is IDLE. The GT
2566          * will then assume that it is busy and bring it out of rc6.
2567          */
2568         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2569                    _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2570
2571         /* Clear the context id. Here be magic! */
2572         I915_WRITE64(GEN6_BSD_RNCID, 0x0);
2573
2574         /* Wait for the ring not to be idle, i.e. for it to wake up. */
2575         if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
2576                       GEN6_BSD_SLEEP_INDICATOR) == 0,
2577                      50))
2578                 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2579
2580         /* Now that the ring is fully powered up, update the tail */
2581         I915_WRITE_TAIL(engine, value);
2582         POSTING_READ(RING_TAIL(engine->mmio_base));
2583
2584         /* Let the ring send IDLE messages to the GT again,
2585          * and so let it sleep to conserve power when idle.
2586          */
2587         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2588                    _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2589 }
2590
2591 static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
2592                                u32 invalidate, u32 flush)
2593 {
2594         struct intel_engine_cs *engine = req->engine;
2595         uint32_t cmd;
2596         int ret;
2597
2598         ret = intel_ring_begin(req, 4);
2599         if (ret)
2600                 return ret;
2601
2602         cmd = MI_FLUSH_DW;
2603         if (INTEL_INFO(engine->dev)->gen >= 8)
2604                 cmd += 1;
2605
2606         /* We always require a command barrier so that subsequent
2607          * commands, such as breadcrumb interrupts, are strictly ordered
2608          * wrt the contents of the write cache being flushed to memory
2609          * (and thus being coherent from the CPU).
2610          */
2611         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2612
2613         /*
2614          * Bspec vol 1c.5 - video engine command streamer:
2615          * "If ENABLED, all TLBs will be invalidated once the flush
2616          * operation is complete. This bit is only valid when the
2617          * Post-Sync Operation field is a value of 1h or 3h."
2618          */
2619         if (invalidate & I915_GEM_GPU_DOMAINS)
2620                 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2621
2622         intel_ring_emit(engine, cmd);
2623         intel_ring_emit(engine,
2624                         I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2625         if (INTEL_INFO(engine->dev)->gen >= 8) {
2626                 intel_ring_emit(engine, 0); /* upper addr */
2627                 intel_ring_emit(engine, 0); /* value */
2628         } else  {
2629                 intel_ring_emit(engine, 0);
2630                 intel_ring_emit(engine, MI_NOOP);
2631         }
2632         intel_ring_advance(engine);
2633         return 0;
2634 }
2635
2636 static int
2637 gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2638                               u64 offset, u32 len,
2639                               unsigned dispatch_flags)
2640 {
2641         struct intel_engine_cs *engine = req->engine;
2642         bool ppgtt = USES_PPGTT(engine->dev) &&
2643                         !(dispatch_flags & I915_DISPATCH_SECURE);
2644         int ret;
2645
2646         ret = intel_ring_begin(req, 4);
2647         if (ret)
2648                 return ret;
2649
2650         /* FIXME(BDW): Address space and security selectors. */
2651         intel_ring_emit(engine, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2652                         (dispatch_flags & I915_DISPATCH_RS ?
2653                          MI_BATCH_RESOURCE_STREAMER : 0));
2654         intel_ring_emit(engine, lower_32_bits(offset));
2655         intel_ring_emit(engine, upper_32_bits(offset));
2656         intel_ring_emit(engine, MI_NOOP);
2657         intel_ring_advance(engine);
2658
2659         return 0;
2660 }
2661
2662 static int
2663 hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2664                              u64 offset, u32 len,
2665                              unsigned dispatch_flags)
2666 {
2667         struct intel_engine_cs *engine = req->engine;
2668         int ret;
2669
2670         ret = intel_ring_begin(req, 2);
2671         if (ret)
2672                 return ret;
2673
2674         intel_ring_emit(engine,
2675                         MI_BATCH_BUFFER_START |
2676                         (dispatch_flags & I915_DISPATCH_SECURE ?
2677                          0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
2678                         (dispatch_flags & I915_DISPATCH_RS ?
2679                          MI_BATCH_RESOURCE_STREAMER : 0));
2680         /* bit0-7 is the length on GEN6+ */
2681         intel_ring_emit(engine, offset);
2682         intel_ring_advance(engine);
2683
2684         return 0;
2685 }
2686
2687 static int
2688 gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2689                               u64 offset, u32 len,
2690                               unsigned dispatch_flags)
2691 {
2692         struct intel_engine_cs *engine = req->engine;
2693         int ret;
2694
2695         ret = intel_ring_begin(req, 2);
2696         if (ret)
2697                 return ret;
2698
2699         intel_ring_emit(engine,
2700                         MI_BATCH_BUFFER_START |
2701                         (dispatch_flags & I915_DISPATCH_SECURE ?
2702                          0 : MI_BATCH_NON_SECURE_I965));
2703         /* bit0-7 is the length on GEN6+ */
2704         intel_ring_emit(engine, offset);
2705         intel_ring_advance(engine);
2706
2707         return 0;
2708 }
2709
2710 /* Blitter support (SandyBridge+) */
2711
2712 static int gen6_ring_flush(struct drm_i915_gem_request *req,
2713                            u32 invalidate, u32 flush)
2714 {
2715         struct intel_engine_cs *engine = req->engine;
2716         struct drm_device *dev = engine->dev;
2717         uint32_t cmd;
2718         int ret;
2719
2720         ret = intel_ring_begin(req, 4);
2721         if (ret)
2722                 return ret;
2723
2724         cmd = MI_FLUSH_DW;
2725         if (INTEL_INFO(dev)->gen >= 8)
2726                 cmd += 1;
2727
2728         /* We always require a command barrier so that subsequent
2729          * commands, such as breadcrumb interrupts, are strictly ordered
2730          * wrt the contents of the write cache being flushed to memory
2731          * (and thus being coherent from the CPU).
2732          */
2733         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2734
2735         /*
2736          * Bspec vol 1c.3 - blitter engine command streamer:
2737          * "If ENABLED, all TLBs will be invalidated once the flush
2738          * operation is complete. This bit is only valid when the
2739          * Post-Sync Operation field is a value of 1h or 3h."
2740          */
2741         if (invalidate & I915_GEM_DOMAIN_RENDER)
2742                 cmd |= MI_INVALIDATE_TLB;
2743         intel_ring_emit(engine, cmd);
2744         intel_ring_emit(engine,
2745                         I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2746         if (INTEL_INFO(dev)->gen >= 8) {
2747                 intel_ring_emit(engine, 0); /* upper addr */
2748                 intel_ring_emit(engine, 0); /* value */
2749         } else  {
2750                 intel_ring_emit(engine, 0);
2751                 intel_ring_emit(engine, MI_NOOP);
2752         }
2753         intel_ring_advance(engine);
2754
2755         return 0;
2756 }
2757
2758 int intel_init_render_ring_buffer(struct drm_device *dev)
2759 {
2760         struct drm_i915_private *dev_priv = dev->dev_private;
2761         struct intel_engine_cs *engine = &dev_priv->engine[RCS];
2762         struct drm_i915_gem_object *obj;
2763         int ret;
2764
2765         engine->name = "render ring";
2766         engine->id = RCS;
2767         engine->exec_id = I915_EXEC_RENDER;
2768         engine->mmio_base = RENDER_RING_BASE;
2769
2770         if (INTEL_INFO(dev)->gen >= 8) {
2771                 if (i915_semaphore_is_enabled(dev)) {
2772                         obj = i915_gem_alloc_object(dev, 4096);
2773                         if (obj == NULL) {
2774                                 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2775                                 i915.semaphores = 0;
2776                         } else {
2777                                 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2778                                 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2779                                 if (ret != 0) {
2780                                         drm_gem_object_unreference(&obj->base);
2781                                         DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2782                                         i915.semaphores = 0;
2783                                 } else
2784                                         dev_priv->semaphore_obj = obj;
2785                         }
2786                 }
2787
2788                 engine->init_context = intel_rcs_ctx_init;
2789                 engine->add_request = gen6_add_request;
2790                 engine->flush = gen8_render_ring_flush;
2791                 engine->irq_get = gen8_ring_get_irq;
2792                 engine->irq_put = gen8_ring_put_irq;
2793                 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2794                 engine->get_seqno = gen6_ring_get_seqno;
2795                 engine->set_seqno = ring_set_seqno;
2796                 if (i915_semaphore_is_enabled(dev)) {
2797                         WARN_ON(!dev_priv->semaphore_obj);
2798                         engine->semaphore.sync_to = gen8_ring_sync;
2799                         engine->semaphore.signal = gen8_rcs_signal;
2800                         GEN8_RING_SEMAPHORE_INIT(engine);
2801                 }
2802         } else if (INTEL_INFO(dev)->gen >= 6) {
2803                 engine->init_context = intel_rcs_ctx_init;
2804                 engine->add_request = gen6_add_request;
2805                 engine->flush = gen7_render_ring_flush;
2806                 if (INTEL_INFO(dev)->gen == 6)
2807                         engine->flush = gen6_render_ring_flush;
2808                 engine->irq_get = gen6_ring_get_irq;
2809                 engine->irq_put = gen6_ring_put_irq;
2810                 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2811                 engine->get_seqno = gen6_ring_get_seqno;
2812                 engine->set_seqno = ring_set_seqno;
2813                 if (i915_semaphore_is_enabled(dev)) {
2814                         engine->semaphore.sync_to = gen6_ring_sync;
2815                         engine->semaphore.signal = gen6_signal;
2816                         /*
2817                          * The current semaphore is only applied on pre-gen8
2818                          * platform.  And there is no VCS2 ring on the pre-gen8
2819                          * platform. So the semaphore between RCS and VCS2 is
2820                          * initialized as INVALID.  Gen8 will initialize the
2821                          * sema between VCS2 and RCS later.
2822                          */
2823                         engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2824                         engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2825                         engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2826                         engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2827                         engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2828                         engine->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2829                         engine->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2830                         engine->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2831                         engine->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2832                         engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2833                 }
2834         } else if (IS_GEN5(dev)) {
2835                 engine->add_request = pc_render_add_request;
2836                 engine->flush = gen4_render_ring_flush;
2837                 engine->get_seqno = pc_render_get_seqno;
2838                 engine->set_seqno = pc_render_set_seqno;
2839                 engine->irq_get = gen5_ring_get_irq;
2840                 engine->irq_put = gen5_ring_put_irq;
2841                 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2842                                         GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
2843         } else {
2844                 engine->add_request = i9xx_add_request;
2845                 if (INTEL_INFO(dev)->gen < 4)
2846                         engine->flush = gen2_render_ring_flush;
2847                 else
2848                         engine->flush = gen4_render_ring_flush;
2849                 engine->get_seqno = ring_get_seqno;
2850                 engine->set_seqno = ring_set_seqno;
2851                 if (IS_GEN2(dev)) {
2852                         engine->irq_get = i8xx_ring_get_irq;
2853                         engine->irq_put = i8xx_ring_put_irq;
2854                 } else {
2855                         engine->irq_get = i9xx_ring_get_irq;
2856                         engine->irq_put = i9xx_ring_put_irq;
2857                 }
2858                 engine->irq_enable_mask = I915_USER_INTERRUPT;
2859         }
2860         engine->write_tail = ring_write_tail;
2861
2862         if (IS_HASWELL(dev))
2863                 engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
2864         else if (IS_GEN8(dev))
2865                 engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2866         else if (INTEL_INFO(dev)->gen >= 6)
2867                 engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2868         else if (INTEL_INFO(dev)->gen >= 4)
2869                 engine->dispatch_execbuffer = i965_dispatch_execbuffer;
2870         else if (IS_I830(dev) || IS_845G(dev))
2871                 engine->dispatch_execbuffer = i830_dispatch_execbuffer;
2872         else
2873                 engine->dispatch_execbuffer = i915_dispatch_execbuffer;
2874         engine->init_hw = init_render_ring;
2875         engine->cleanup = render_ring_cleanup;
2876
2877         /* Workaround batchbuffer to combat CS tlb bug. */
2878         if (HAS_BROKEN_CS_TLB(dev)) {
2879                 obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
2880                 if (obj == NULL) {
2881                         DRM_ERROR("Failed to allocate batch bo\n");
2882                         return -ENOMEM;
2883                 }
2884
2885                 ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
2886                 if (ret != 0) {
2887                         drm_gem_object_unreference(&obj->base);
2888                         DRM_ERROR("Failed to ping batch bo\n");
2889                         return ret;
2890                 }
2891
2892                 engine->scratch.obj = obj;
2893                 engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
2894         }
2895
2896         ret = intel_init_ring_buffer(dev, engine);
2897         if (ret)
2898                 return ret;
2899
2900         if (INTEL_INFO(dev)->gen >= 5) {
2901                 ret = intel_init_pipe_control(engine);
2902                 if (ret)
2903                         return ret;
2904         }
2905
2906         return 0;
2907 }
2908
2909 int intel_init_bsd_ring_buffer(struct drm_device *dev)
2910 {
2911         struct drm_i915_private *dev_priv = dev->dev_private;
2912         struct intel_engine_cs *engine = &dev_priv->engine[VCS];
2913
2914         engine->name = "bsd ring";
2915         engine->id = VCS;
2916         engine->exec_id = I915_EXEC_BSD;
2917
2918         engine->write_tail = ring_write_tail;
2919         if (INTEL_INFO(dev)->gen >= 6) {
2920                 engine->mmio_base = GEN6_BSD_RING_BASE;
2921                 /* gen6 bsd needs a special wa for tail updates */
2922                 if (IS_GEN6(dev))
2923                         engine->write_tail = gen6_bsd_ring_write_tail;
2924                 engine->flush = gen6_bsd_ring_flush;
2925                 engine->add_request = gen6_add_request;
2926                 engine->get_seqno = gen6_ring_get_seqno;
2927                 engine->set_seqno = ring_set_seqno;
2928                 if (INTEL_INFO(dev)->gen >= 8) {
2929                         engine->irq_enable_mask =
2930                                 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2931                         engine->irq_get = gen8_ring_get_irq;
2932                         engine->irq_put = gen8_ring_put_irq;
2933                         engine->dispatch_execbuffer =
2934                                 gen8_ring_dispatch_execbuffer;
2935                         if (i915_semaphore_is_enabled(dev)) {
2936                                 engine->semaphore.sync_to = gen8_ring_sync;
2937                                 engine->semaphore.signal = gen8_xcs_signal;
2938                                 GEN8_RING_SEMAPHORE_INIT(engine);
2939                         }
2940                 } else {
2941                         engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2942                         engine->irq_get = gen6_ring_get_irq;
2943                         engine->irq_put = gen6_ring_put_irq;
2944                         engine->dispatch_execbuffer =
2945                                 gen6_ring_dispatch_execbuffer;
2946                         if (i915_semaphore_is_enabled(dev)) {
2947                                 engine->semaphore.sync_to = gen6_ring_sync;
2948                                 engine->semaphore.signal = gen6_signal;
2949                                 engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2950                                 engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2951                                 engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2952                                 engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2953                                 engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2954                                 engine->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2955                                 engine->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2956                                 engine->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2957                                 engine->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2958                                 engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2959                         }
2960                 }
2961         } else {
2962                 engine->mmio_base = BSD_RING_BASE;
2963                 engine->flush = bsd_ring_flush;
2964                 engine->add_request = i9xx_add_request;
2965                 engine->get_seqno = ring_get_seqno;
2966                 engine->set_seqno = ring_set_seqno;
2967                 if (IS_GEN5(dev)) {
2968                         engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
2969                         engine->irq_get = gen5_ring_get_irq;
2970                         engine->irq_put = gen5_ring_put_irq;
2971                 } else {
2972                         engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2973                         engine->irq_get = i9xx_ring_get_irq;
2974                         engine->irq_put = i9xx_ring_put_irq;
2975                 }
2976                 engine->dispatch_execbuffer = i965_dispatch_execbuffer;
2977         }
2978         engine->init_hw = init_ring_common;
2979
2980         return intel_init_ring_buffer(dev, engine);
2981 }
2982
2983 /**
2984  * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2985  */
2986 int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2987 {
2988         struct drm_i915_private *dev_priv = dev->dev_private;
2989         struct intel_engine_cs *engine = &dev_priv->engine[VCS2];
2990
2991         engine->name = "bsd2 ring";
2992         engine->id = VCS2;
2993         engine->exec_id = I915_EXEC_BSD;
2994
2995         engine->write_tail = ring_write_tail;
2996         engine->mmio_base = GEN8_BSD2_RING_BASE;
2997         engine->flush = gen6_bsd_ring_flush;
2998         engine->add_request = gen6_add_request;
2999         engine->get_seqno = gen6_ring_get_seqno;
3000         engine->set_seqno = ring_set_seqno;
3001         engine->irq_enable_mask =
3002                         GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
3003         engine->irq_get = gen8_ring_get_irq;
3004         engine->irq_put = gen8_ring_put_irq;
3005         engine->dispatch_execbuffer =
3006                         gen8_ring_dispatch_execbuffer;
3007         if (i915_semaphore_is_enabled(dev)) {
3008                 engine->semaphore.sync_to = gen8_ring_sync;
3009                 engine->semaphore.signal = gen8_xcs_signal;
3010                 GEN8_RING_SEMAPHORE_INIT(engine);
3011         }
3012         engine->init_hw = init_ring_common;
3013
3014         return intel_init_ring_buffer(dev, engine);
3015 }
3016
3017 int intel_init_blt_ring_buffer(struct drm_device *dev)
3018 {
3019         struct drm_i915_private *dev_priv = dev->dev_private;
3020         struct intel_engine_cs *engine = &dev_priv->engine[BCS];
3021
3022         engine->name = "blitter ring";
3023         engine->id = BCS;
3024         engine->exec_id = I915_EXEC_BLT;
3025
3026         engine->mmio_base = BLT_RING_BASE;
3027         engine->write_tail = ring_write_tail;
3028         engine->flush = gen6_ring_flush;
3029         engine->add_request = gen6_add_request;
3030         engine->get_seqno = gen6_ring_get_seqno;
3031         engine->set_seqno = ring_set_seqno;
3032         if (INTEL_INFO(dev)->gen >= 8) {
3033                 engine->irq_enable_mask =
3034                         GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
3035                 engine->irq_get = gen8_ring_get_irq;
3036                 engine->irq_put = gen8_ring_put_irq;
3037                 engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
3038                 if (i915_semaphore_is_enabled(dev)) {
3039                         engine->semaphore.sync_to = gen8_ring_sync;
3040                         engine->semaphore.signal = gen8_xcs_signal;
3041                         GEN8_RING_SEMAPHORE_INIT(engine);
3042                 }
3043         } else {
3044                 engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
3045                 engine->irq_get = gen6_ring_get_irq;
3046                 engine->irq_put = gen6_ring_put_irq;
3047                 engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
3048                 if (i915_semaphore_is_enabled(dev)) {
3049                         engine->semaphore.signal = gen6_signal;
3050                         engine->semaphore.sync_to = gen6_ring_sync;
3051                         /*
3052                          * The current semaphore is only applied on pre-gen8
3053                          * platform.  And there is no VCS2 ring on the pre-gen8
3054                          * platform. So the semaphore between BCS and VCS2 is
3055                          * initialized as INVALID.  Gen8 will initialize the
3056                          * sema between BCS and VCS2 later.
3057                          */
3058                         engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
3059                         engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
3060                         engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
3061                         engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
3062                         engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3063                         engine->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
3064                         engine->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
3065                         engine->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
3066                         engine->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
3067                         engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3068                 }
3069         }
3070         engine->init_hw = init_ring_common;
3071
3072         return intel_init_ring_buffer(dev, engine);
3073 }
3074
3075 int intel_init_vebox_ring_buffer(struct drm_device *dev)
3076 {
3077         struct drm_i915_private *dev_priv = dev->dev_private;
3078         struct intel_engine_cs *engine = &dev_priv->engine[VECS];
3079
3080         engine->name = "video enhancement ring";
3081         engine->id = VECS;
3082         engine->exec_id = I915_EXEC_VEBOX;
3083
3084         engine->mmio_base = VEBOX_RING_BASE;
3085         engine->write_tail = ring_write_tail;
3086         engine->flush = gen6_ring_flush;
3087         engine->add_request = gen6_add_request;
3088         engine->get_seqno = gen6_ring_get_seqno;
3089         engine->set_seqno = ring_set_seqno;
3090
3091         if (INTEL_INFO(dev)->gen >= 8) {
3092                 engine->irq_enable_mask =
3093                         GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
3094                 engine->irq_get = gen8_ring_get_irq;
3095                 engine->irq_put = gen8_ring_put_irq;
3096                 engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
3097                 if (i915_semaphore_is_enabled(dev)) {
3098                         engine->semaphore.sync_to = gen8_ring_sync;
3099                         engine->semaphore.signal = gen8_xcs_signal;
3100                         GEN8_RING_SEMAPHORE_INIT(engine);
3101                 }
3102         } else {
3103                 engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
3104                 engine->irq_get = hsw_vebox_get_irq;
3105                 engine->irq_put = hsw_vebox_put_irq;
3106                 engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
3107                 if (i915_semaphore_is_enabled(dev)) {
3108                         engine->semaphore.sync_to = gen6_ring_sync;
3109                         engine->semaphore.signal = gen6_signal;
3110                         engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
3111                         engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
3112                         engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
3113                         engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
3114                         engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
3115                         engine->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
3116                         engine->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
3117                         engine->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
3118                         engine->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
3119                         engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
3120                 }
3121         }
3122         engine->init_hw = init_ring_common;
3123
3124         return intel_init_ring_buffer(dev, engine);
3125 }
3126
3127 int
3128 intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
3129 {
3130         struct intel_engine_cs *engine = req->engine;
3131         int ret;
3132
3133         if (!engine->gpu_caches_dirty)
3134                 return 0;
3135
3136         ret = engine->flush(req, 0, I915_GEM_GPU_DOMAINS);
3137         if (ret)
3138                 return ret;
3139
3140         trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
3141
3142         engine->gpu_caches_dirty = false;
3143         return 0;
3144 }
3145
3146 int
3147 intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
3148 {
3149         struct intel_engine_cs *engine = req->engine;
3150         uint32_t flush_domains;
3151         int ret;
3152
3153         flush_domains = 0;
3154         if (engine->gpu_caches_dirty)
3155                 flush_domains = I915_GEM_GPU_DOMAINS;
3156
3157         ret = engine->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3158         if (ret)
3159                 return ret;
3160
3161         trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3162
3163         engine->gpu_caches_dirty = false;
3164         return 0;
3165 }
3166
3167 void
3168 intel_stop_engine(struct intel_engine_cs *engine)
3169 {
3170         int ret;
3171
3172         if (!intel_engine_initialized(engine))
3173                 return;
3174
3175         ret = intel_engine_idle(engine);
3176         if (ret && !i915_reset_in_progress(&to_i915(engine->dev)->gpu_error))
3177                 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3178                           engine->name, ret);
3179
3180         stop_ring(engine);
3181 }