drm/i915: Pull the unconditional GPU cache invalidation into request construction
[linux-2.6-block.git] / drivers / gpu / drm / i915 / selftests / intel_hangcheck.c
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24
25 #include <linux/kthread.h>
26
27 #include "../i915_selftest.h"
28
29 #include "mock_context.h"
30 #include "mock_drm.h"
31
32 struct hang {
33         struct drm_i915_private *i915;
34         struct drm_i915_gem_object *hws;
35         struct drm_i915_gem_object *obj;
36         u32 *seqno;
37         u32 *batch;
38 };
39
40 static int hang_init(struct hang *h, struct drm_i915_private *i915)
41 {
42         void *vaddr;
43         int err;
44
45         memset(h, 0, sizeof(*h));
46         h->i915 = i915;
47
48         h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
49         if (IS_ERR(h->hws))
50                 return PTR_ERR(h->hws);
51
52         h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
53         if (IS_ERR(h->obj)) {
54                 err = PTR_ERR(h->obj);
55                 goto err_hws;
56         }
57
58         i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
59         vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
60         if (IS_ERR(vaddr)) {
61                 err = PTR_ERR(vaddr);
62                 goto err_obj;
63         }
64         h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
65
66         vaddr = i915_gem_object_pin_map(h->obj,
67                                         HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
68         if (IS_ERR(vaddr)) {
69                 err = PTR_ERR(vaddr);
70                 goto err_unpin_hws;
71         }
72         h->batch = vaddr;
73
74         return 0;
75
76 err_unpin_hws:
77         i915_gem_object_unpin_map(h->hws);
78 err_obj:
79         i915_gem_object_put(h->obj);
80 err_hws:
81         i915_gem_object_put(h->hws);
82         return err;
83 }
84
85 static u64 hws_address(const struct i915_vma *hws,
86                        const struct drm_i915_gem_request *rq)
87 {
88         return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
89 }
90
91 static int emit_recurse_batch(struct hang *h,
92                               struct drm_i915_gem_request *rq)
93 {
94         struct drm_i915_private *i915 = h->i915;
95         struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
96         struct i915_vma *hws, *vma;
97         unsigned int flags;
98         u32 *batch;
99         int err;
100
101         vma = i915_vma_instance(h->obj, vm, NULL);
102         if (IS_ERR(vma))
103                 return PTR_ERR(vma);
104
105         hws = i915_vma_instance(h->hws, vm, NULL);
106         if (IS_ERR(hws))
107                 return PTR_ERR(hws);
108
109         err = i915_vma_pin(vma, 0, 0, PIN_USER);
110         if (err)
111                 return err;
112
113         err = i915_vma_pin(hws, 0, 0, PIN_USER);
114         if (err)
115                 goto unpin_vma;
116
117         err = i915_switch_context(rq);
118         if (err)
119                 goto unpin_hws;
120
121         i915_vma_move_to_active(vma, rq, 0);
122         if (!i915_gem_object_has_active_reference(vma->obj)) {
123                 i915_gem_object_get(vma->obj);
124                 i915_gem_object_set_active_reference(vma->obj);
125         }
126
127         i915_vma_move_to_active(hws, rq, 0);
128         if (!i915_gem_object_has_active_reference(hws->obj)) {
129                 i915_gem_object_get(hws->obj);
130                 i915_gem_object_set_active_reference(hws->obj);
131         }
132
133         batch = h->batch;
134         if (INTEL_GEN(i915) >= 8) {
135                 *batch++ = MI_STORE_DWORD_IMM_GEN4;
136                 *batch++ = lower_32_bits(hws_address(hws, rq));
137                 *batch++ = upper_32_bits(hws_address(hws, rq));
138                 *batch++ = rq->fence.seqno;
139                 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
140                 *batch++ = lower_32_bits(vma->node.start);
141                 *batch++ = upper_32_bits(vma->node.start);
142         } else if (INTEL_GEN(i915) >= 6) {
143                 *batch++ = MI_STORE_DWORD_IMM_GEN4;
144                 *batch++ = 0;
145                 *batch++ = lower_32_bits(hws_address(hws, rq));
146                 *batch++ = rq->fence.seqno;
147                 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
148                 *batch++ = lower_32_bits(vma->node.start);
149         } else if (INTEL_GEN(i915) >= 4) {
150                 *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
151                 *batch++ = 0;
152                 *batch++ = lower_32_bits(hws_address(hws, rq));
153                 *batch++ = rq->fence.seqno;
154                 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
155                 *batch++ = lower_32_bits(vma->node.start);
156         } else {
157                 *batch++ = MI_STORE_DWORD_IMM;
158                 *batch++ = lower_32_bits(hws_address(hws, rq));
159                 *batch++ = rq->fence.seqno;
160                 *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
161                 *batch++ = lower_32_bits(vma->node.start);
162         }
163         *batch++ = MI_BATCH_BUFFER_END; /* not reached */
164         i915_gem_chipset_flush(h->i915);
165
166         flags = 0;
167         if (INTEL_GEN(vm->i915) <= 5)
168                 flags |= I915_DISPATCH_SECURE;
169
170         err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
171
172 unpin_hws:
173         i915_vma_unpin(hws);
174 unpin_vma:
175         i915_vma_unpin(vma);
176         return err;
177 }
178
179 static struct drm_i915_gem_request *
180 hang_create_request(struct hang *h,
181                     struct intel_engine_cs *engine,
182                     struct i915_gem_context *ctx)
183 {
184         struct drm_i915_gem_request *rq;
185         int err;
186
187         if (i915_gem_object_is_active(h->obj)) {
188                 struct drm_i915_gem_object *obj;
189                 void *vaddr;
190
191                 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
192                 if (IS_ERR(obj))
193                         return ERR_CAST(obj);
194
195                 vaddr = i915_gem_object_pin_map(obj,
196                                                 HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
197                 if (IS_ERR(vaddr)) {
198                         i915_gem_object_put(obj);
199                         return ERR_CAST(vaddr);
200                 }
201
202                 i915_gem_object_unpin_map(h->obj);
203                 i915_gem_object_put(h->obj);
204
205                 h->obj = obj;
206                 h->batch = vaddr;
207         }
208
209         rq = i915_gem_request_alloc(engine, ctx);
210         if (IS_ERR(rq))
211                 return rq;
212
213         err = emit_recurse_batch(h, rq);
214         if (err) {
215                 __i915_add_request(rq, false);
216                 return ERR_PTR(err);
217         }
218
219         return rq;
220 }
221
222 static u32 hws_seqno(const struct hang *h,
223                      const struct drm_i915_gem_request *rq)
224 {
225         return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
226 }
227
228 static void hang_fini(struct hang *h)
229 {
230         *h->batch = MI_BATCH_BUFFER_END;
231         i915_gem_chipset_flush(h->i915);
232
233         i915_gem_object_unpin_map(h->obj);
234         i915_gem_object_put(h->obj);
235
236         i915_gem_object_unpin_map(h->hws);
237         i915_gem_object_put(h->hws);
238
239         i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
240 }
241
242 static int igt_hang_sanitycheck(void *arg)
243 {
244         struct drm_i915_private *i915 = arg;
245         struct drm_i915_gem_request *rq;
246         struct intel_engine_cs *engine;
247         enum intel_engine_id id;
248         struct hang h;
249         int err;
250
251         /* Basic check that we can execute our hanging batch */
252
253         mutex_lock(&i915->drm.struct_mutex);
254         err = hang_init(&h, i915);
255         if (err)
256                 goto unlock;
257
258         for_each_engine(engine, i915, id) {
259                 long timeout;
260
261                 if (!intel_engine_can_store_dword(engine))
262                         continue;
263
264                 rq = hang_create_request(&h, engine, i915->kernel_context);
265                 if (IS_ERR(rq)) {
266                         err = PTR_ERR(rq);
267                         pr_err("Failed to create request for %s, err=%d\n",
268                                engine->name, err);
269                         goto fini;
270                 }
271
272                 i915_gem_request_get(rq);
273
274                 *h.batch = MI_BATCH_BUFFER_END;
275                 i915_gem_chipset_flush(i915);
276
277                 __i915_add_request(rq, true);
278
279                 timeout = i915_wait_request(rq,
280                                             I915_WAIT_LOCKED,
281                                             MAX_SCHEDULE_TIMEOUT);
282                 i915_gem_request_put(rq);
283
284                 if (timeout < 0) {
285                         err = timeout;
286                         pr_err("Wait for request failed on %s, err=%d\n",
287                                engine->name, err);
288                         goto fini;
289                 }
290         }
291
292 fini:
293         hang_fini(&h);
294 unlock:
295         mutex_unlock(&i915->drm.struct_mutex);
296         return err;
297 }
298
299 static void global_reset_lock(struct drm_i915_private *i915)
300 {
301         struct intel_engine_cs *engine;
302         enum intel_engine_id id;
303
304         while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
305                 wait_event(i915->gpu_error.reset_queue,
306                            !test_bit(I915_RESET_BACKOFF,
307                                      &i915->gpu_error.flags));
308
309         for_each_engine(engine, i915, id) {
310                 while (test_and_set_bit(I915_RESET_ENGINE + id,
311                                         &i915->gpu_error.flags))
312                         wait_on_bit(&i915->gpu_error.flags,
313                                     I915_RESET_ENGINE + id,
314                                     TASK_UNINTERRUPTIBLE);
315         }
316 }
317
318 static void global_reset_unlock(struct drm_i915_private *i915)
319 {
320         struct intel_engine_cs *engine;
321         enum intel_engine_id id;
322
323         for_each_engine(engine, i915, id)
324                 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
325
326         clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
327         wake_up_all(&i915->gpu_error.reset_queue);
328 }
329
330 static int igt_global_reset(void *arg)
331 {
332         struct drm_i915_private *i915 = arg;
333         unsigned int reset_count;
334         int err = 0;
335
336         /* Check that we can issue a global GPU reset */
337
338         global_reset_lock(i915);
339         set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
340
341         mutex_lock(&i915->drm.struct_mutex);
342         reset_count = i915_reset_count(&i915->gpu_error);
343
344         i915_reset(i915, I915_RESET_QUIET);
345
346         if (i915_reset_count(&i915->gpu_error) == reset_count) {
347                 pr_err("No GPU reset recorded!\n");
348                 err = -EINVAL;
349         }
350         mutex_unlock(&i915->drm.struct_mutex);
351
352         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
353         global_reset_unlock(i915);
354
355         if (i915_terminally_wedged(&i915->gpu_error))
356                 err = -EIO;
357
358         return err;
359 }
360
361 static int igt_reset_engine(void *arg)
362 {
363         struct drm_i915_private *i915 = arg;
364         struct intel_engine_cs *engine;
365         enum intel_engine_id id;
366         unsigned int reset_count, reset_engine_count;
367         int err = 0;
368
369         /* Check that we can issue a global GPU and engine reset */
370
371         if (!intel_has_reset_engine(i915))
372                 return 0;
373
374         for_each_engine(engine, i915, id) {
375                 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
376                 reset_count = i915_reset_count(&i915->gpu_error);
377                 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
378                                                              engine);
379
380                 err = i915_reset_engine(engine, I915_RESET_QUIET);
381                 if (err) {
382                         pr_err("i915_reset_engine failed\n");
383                         break;
384                 }
385
386                 if (i915_reset_count(&i915->gpu_error) != reset_count) {
387                         pr_err("Full GPU reset recorded! (engine reset expected)\n");
388                         err = -EINVAL;
389                         break;
390                 }
391
392                 if (i915_reset_engine_count(&i915->gpu_error, engine) ==
393                     reset_engine_count) {
394                         pr_err("No %s engine reset recorded!\n", engine->name);
395                         err = -EINVAL;
396                         break;
397                 }
398
399                 clear_bit(I915_RESET_ENGINE + engine->id,
400                           &i915->gpu_error.flags);
401         }
402
403         if (i915_terminally_wedged(&i915->gpu_error))
404                 err = -EIO;
405
406         return err;
407 }
408
409 static int active_engine(void *data)
410 {
411         struct intel_engine_cs *engine = data;
412         struct drm_i915_gem_request *rq[2] = {};
413         struct i915_gem_context *ctx[2];
414         struct drm_file *file;
415         unsigned long count = 0;
416         int err = 0;
417
418         file = mock_file(engine->i915);
419         if (IS_ERR(file))
420                 return PTR_ERR(file);
421
422         mutex_lock(&engine->i915->drm.struct_mutex);
423         ctx[0] = live_context(engine->i915, file);
424         mutex_unlock(&engine->i915->drm.struct_mutex);
425         if (IS_ERR(ctx[0])) {
426                 err = PTR_ERR(ctx[0]);
427                 goto err_file;
428         }
429
430         mutex_lock(&engine->i915->drm.struct_mutex);
431         ctx[1] = live_context(engine->i915, file);
432         mutex_unlock(&engine->i915->drm.struct_mutex);
433         if (IS_ERR(ctx[1])) {
434                 err = PTR_ERR(ctx[1]);
435                 i915_gem_context_put(ctx[0]);
436                 goto err_file;
437         }
438
439         while (!kthread_should_stop()) {
440                 unsigned int idx = count++ & 1;
441                 struct drm_i915_gem_request *old = rq[idx];
442                 struct drm_i915_gem_request *new;
443
444                 mutex_lock(&engine->i915->drm.struct_mutex);
445                 new = i915_gem_request_alloc(engine, ctx[idx]);
446                 if (IS_ERR(new)) {
447                         mutex_unlock(&engine->i915->drm.struct_mutex);
448                         err = PTR_ERR(new);
449                         break;
450                 }
451
452                 rq[idx] = i915_gem_request_get(new);
453                 i915_add_request(new);
454                 mutex_unlock(&engine->i915->drm.struct_mutex);
455
456                 if (old) {
457                         i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
458                         i915_gem_request_put(old);
459                 }
460         }
461
462         for (count = 0; count < ARRAY_SIZE(rq); count++)
463                 i915_gem_request_put(rq[count]);
464
465 err_file:
466         mock_file_free(engine->i915, file);
467         return err;
468 }
469
470 static int igt_reset_active_engines(void *arg)
471 {
472         struct drm_i915_private *i915 = arg;
473         struct intel_engine_cs *engine, *active;
474         enum intel_engine_id id, tmp;
475         int err = 0;
476
477         /* Check that issuing a reset on one engine does not interfere
478          * with any other engine.
479          */
480
481         if (!intel_has_reset_engine(i915))
482                 return 0;
483
484         for_each_engine(engine, i915, id) {
485                 struct task_struct *threads[I915_NUM_ENGINES];
486                 unsigned long resets[I915_NUM_ENGINES];
487                 unsigned long global = i915_reset_count(&i915->gpu_error);
488                 IGT_TIMEOUT(end_time);
489
490                 memset(threads, 0, sizeof(threads));
491                 for_each_engine(active, i915, tmp) {
492                         struct task_struct *tsk;
493
494                         if (active == engine)
495                                 continue;
496
497                         resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
498                                                               active);
499
500                         tsk = kthread_run(active_engine, active,
501                                           "igt/%s", active->name);
502                         if (IS_ERR(tsk)) {
503                                 err = PTR_ERR(tsk);
504                                 goto unwind;
505                         }
506
507                         threads[tmp] = tsk;
508                         get_task_struct(tsk);
509                 }
510
511                 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
512                 do {
513                         err = i915_reset_engine(engine, I915_RESET_QUIET);
514                         if (err) {
515                                 pr_err("i915_reset_engine(%s) failed, err=%d\n",
516                                        engine->name, err);
517                                 break;
518                         }
519                 } while (time_before(jiffies, end_time));
520                 clear_bit(I915_RESET_ENGINE + engine->id,
521                           &i915->gpu_error.flags);
522
523 unwind:
524                 for_each_engine(active, i915, tmp) {
525                         int ret;
526
527                         if (!threads[tmp])
528                                 continue;
529
530                         ret = kthread_stop(threads[tmp]);
531                         if (ret) {
532                                 pr_err("kthread for active engine %s failed, err=%d\n",
533                                        active->name, ret);
534                                 if (!err)
535                                         err = ret;
536                         }
537                         put_task_struct(threads[tmp]);
538
539                         if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
540                                                                    active)) {
541                                 pr_err("Innocent engine %s was reset (count=%ld)\n",
542                                        active->name,
543                                        i915_reset_engine_count(&i915->gpu_error,
544                                                                active) - resets[tmp]);
545                                 err = -EIO;
546                         }
547                 }
548
549                 if (global != i915_reset_count(&i915->gpu_error)) {
550                         pr_err("Global reset (count=%ld)!\n",
551                                i915_reset_count(&i915->gpu_error) - global);
552                         err = -EIO;
553                 }
554
555                 if (err)
556                         break;
557
558                 cond_resched();
559         }
560
561         if (i915_terminally_wedged(&i915->gpu_error))
562                 err = -EIO;
563
564         return err;
565 }
566
567 static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
568 {
569         u32 reset_count;
570
571         rq->engine->hangcheck.stalled = true;
572         rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
573
574         reset_count = i915_reset_count(&rq->i915->gpu_error);
575
576         set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
577         wake_up_all(&rq->i915->gpu_error.wait_queue);
578
579         return reset_count;
580 }
581
582 static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
583 {
584         return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
585                                                rq->fence.seqno),
586                              10) &&
587                  wait_for(i915_seqno_passed(hws_seqno(h, rq),
588                                             rq->fence.seqno),
589                           1000));
590 }
591
592 static int igt_wait_reset(void *arg)
593 {
594         struct drm_i915_private *i915 = arg;
595         struct drm_i915_gem_request *rq;
596         unsigned int reset_count;
597         struct hang h;
598         long timeout;
599         int err;
600
601         if (!intel_engine_can_store_dword(i915->engine[RCS]))
602                 return 0;
603
604         /* Check that we detect a stuck waiter and issue a reset */
605
606         global_reset_lock(i915);
607
608         mutex_lock(&i915->drm.struct_mutex);
609         err = hang_init(&h, i915);
610         if (err)
611                 goto unlock;
612
613         rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
614         if (IS_ERR(rq)) {
615                 err = PTR_ERR(rq);
616                 goto fini;
617         }
618
619         i915_gem_request_get(rq);
620         __i915_add_request(rq, true);
621
622         if (!wait_for_hang(&h, rq)) {
623                 struct drm_printer p = drm_info_printer(i915->drm.dev);
624
625                 pr_err("Failed to start request %x, at %x\n",
626                        rq->fence.seqno, hws_seqno(&h, rq));
627                 intel_engine_dump(rq->engine, &p);
628
629                 i915_reset(i915, 0);
630                 i915_gem_set_wedged(i915);
631
632                 err = -EIO;
633                 goto out_rq;
634         }
635
636         reset_count = fake_hangcheck(rq);
637
638         timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10);
639         if (timeout < 0) {
640                 pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
641                        timeout);
642                 err = timeout;
643                 goto out_rq;
644         }
645
646         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
647         if (i915_reset_count(&i915->gpu_error) == reset_count) {
648                 pr_err("No GPU reset recorded!\n");
649                 err = -EINVAL;
650                 goto out_rq;
651         }
652
653 out_rq:
654         i915_gem_request_put(rq);
655 fini:
656         hang_fini(&h);
657 unlock:
658         mutex_unlock(&i915->drm.struct_mutex);
659         global_reset_unlock(i915);
660
661         if (i915_terminally_wedged(&i915->gpu_error))
662                 return -EIO;
663
664         return err;
665 }
666
667 static int igt_reset_queue(void *arg)
668 {
669         struct drm_i915_private *i915 = arg;
670         struct intel_engine_cs *engine;
671         enum intel_engine_id id;
672         struct hang h;
673         int err;
674
675         /* Check that we replay pending requests following a hang */
676
677         global_reset_lock(i915);
678
679         mutex_lock(&i915->drm.struct_mutex);
680         err = hang_init(&h, i915);
681         if (err)
682                 goto unlock;
683
684         for_each_engine(engine, i915, id) {
685                 struct drm_i915_gem_request *prev;
686                 IGT_TIMEOUT(end_time);
687                 unsigned int count;
688
689                 if (!intel_engine_can_store_dword(engine))
690                         continue;
691
692                 prev = hang_create_request(&h, engine, i915->kernel_context);
693                 if (IS_ERR(prev)) {
694                         err = PTR_ERR(prev);
695                         goto fini;
696                 }
697
698                 i915_gem_request_get(prev);
699                 __i915_add_request(prev, true);
700
701                 count = 0;
702                 do {
703                         struct drm_i915_gem_request *rq;
704                         unsigned int reset_count;
705
706                         rq = hang_create_request(&h,
707                                                  engine,
708                                                  i915->kernel_context);
709                         if (IS_ERR(rq)) {
710                                 err = PTR_ERR(rq);
711                                 goto fini;
712                         }
713
714                         i915_gem_request_get(rq);
715                         __i915_add_request(rq, true);
716
717                         if (!wait_for_hang(&h, prev)) {
718                                 struct drm_printer p = drm_info_printer(i915->drm.dev);
719
720                                 pr_err("Failed to start request %x, at %x\n",
721                                        prev->fence.seqno, hws_seqno(&h, prev));
722                                 intel_engine_dump(rq->engine, &p);
723
724                                 i915_gem_request_put(rq);
725                                 i915_gem_request_put(prev);
726
727                                 i915_reset(i915, 0);
728                                 i915_gem_set_wedged(i915);
729
730                                 err = -EIO;
731                                 goto fini;
732                         }
733
734                         reset_count = fake_hangcheck(prev);
735
736                         i915_reset(i915, I915_RESET_QUIET);
737
738                         GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
739                                             &i915->gpu_error.flags));
740
741                         if (prev->fence.error != -EIO) {
742                                 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
743                                        prev->fence.error);
744                                 i915_gem_request_put(rq);
745                                 i915_gem_request_put(prev);
746                                 err = -EINVAL;
747                                 goto fini;
748                         }
749
750                         if (rq->fence.error) {
751                                 pr_err("Fence error status not zero [%d] after unrelated reset\n",
752                                        rq->fence.error);
753                                 i915_gem_request_put(rq);
754                                 i915_gem_request_put(prev);
755                                 err = -EINVAL;
756                                 goto fini;
757                         }
758
759                         if (i915_reset_count(&i915->gpu_error) == reset_count) {
760                                 pr_err("No GPU reset recorded!\n");
761                                 i915_gem_request_put(rq);
762                                 i915_gem_request_put(prev);
763                                 err = -EINVAL;
764                                 goto fini;
765                         }
766
767                         i915_gem_request_put(prev);
768                         prev = rq;
769                         count++;
770                 } while (time_before(jiffies, end_time));
771                 pr_info("%s: Completed %d resets\n", engine->name, count);
772
773                 *h.batch = MI_BATCH_BUFFER_END;
774                 i915_gem_chipset_flush(i915);
775
776                 i915_gem_request_put(prev);
777         }
778
779 fini:
780         hang_fini(&h);
781 unlock:
782         mutex_unlock(&i915->drm.struct_mutex);
783         global_reset_unlock(i915);
784
785         if (i915_terminally_wedged(&i915->gpu_error))
786                 return -EIO;
787
788         return err;
789 }
790
791 static int igt_handle_error(void *arg)
792 {
793         struct drm_i915_private *i915 = arg;
794         struct intel_engine_cs *engine = i915->engine[RCS];
795         struct hang h;
796         struct drm_i915_gem_request *rq;
797         struct i915_gpu_state *error;
798         int err;
799
800         /* Check that we can issue a global GPU and engine reset */
801
802         if (!intel_has_reset_engine(i915))
803                 return 0;
804
805         if (!intel_engine_can_store_dword(i915->engine[RCS]))
806                 return 0;
807
808         mutex_lock(&i915->drm.struct_mutex);
809
810         err = hang_init(&h, i915);
811         if (err)
812                 goto err_unlock;
813
814         rq = hang_create_request(&h, engine, i915->kernel_context);
815         if (IS_ERR(rq)) {
816                 err = PTR_ERR(rq);
817                 goto err_fini;
818         }
819
820         i915_gem_request_get(rq);
821         __i915_add_request(rq, true);
822
823         if (!wait_for_hang(&h, rq)) {
824                 struct drm_printer p = drm_info_printer(i915->drm.dev);
825
826                 pr_err("Failed to start request %x, at %x\n",
827                        rq->fence.seqno, hws_seqno(&h, rq));
828                 intel_engine_dump(rq->engine, &p);
829
830                 i915_reset(i915, 0);
831                 i915_gem_set_wedged(i915);
832
833                 err = -EIO;
834                 goto err_request;
835         }
836
837         mutex_unlock(&i915->drm.struct_mutex);
838
839         /* Temporarily disable error capture */
840         error = xchg(&i915->gpu_error.first_error, (void *)-1);
841
842         engine->hangcheck.stalled = true;
843         engine->hangcheck.seqno = intel_engine_get_seqno(engine);
844
845         i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
846
847         xchg(&i915->gpu_error.first_error, error);
848
849         mutex_lock(&i915->drm.struct_mutex);
850
851         if (rq->fence.error != -EIO) {
852                 pr_err("Guilty request not identified!\n");
853                 err = -EINVAL;
854                 goto err_request;
855         }
856
857 err_request:
858         i915_gem_request_put(rq);
859 err_fini:
860         hang_fini(&h);
861 err_unlock:
862         mutex_unlock(&i915->drm.struct_mutex);
863         return err;
864 }
865
866 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
867 {
868         static const struct i915_subtest tests[] = {
869                 SUBTEST(igt_global_reset), /* attempt to recover GPU first */
870                 SUBTEST(igt_hang_sanitycheck),
871                 SUBTEST(igt_reset_engine),
872                 SUBTEST(igt_reset_active_engines),
873                 SUBTEST(igt_wait_reset),
874                 SUBTEST(igt_reset_queue),
875                 SUBTEST(igt_handle_error),
876         };
877         int err;
878
879         if (!intel_has_gpu_reset(i915))
880                 return 0;
881
882         intel_runtime_pm_get(i915);
883
884         err = i915_subtests(tests, i915);
885
886         intel_runtime_pm_put(i915);
887
888         return err;
889 }