drm/i915/huc: Reorganize HuC authentication
[linux-block.git] / drivers / gpu / drm / i915 / selftests / intel_hangcheck.c
CommitLineData
496b575e
CW
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
79f0f472
CW
25#include <linux/kthread.h>
26
496b575e
CW
27#include "../i915_selftest.h"
28
79f0f472
CW
29#include "mock_context.h"
30#include "mock_drm.h"
31
496b575e
CW
32struct hang {
33 struct drm_i915_private *i915;
34 struct drm_i915_gem_object *hws;
35 struct drm_i915_gem_object *obj;
36 u32 *seqno;
37 u32 *batch;
38};
39
40static int hang_init(struct hang *h, struct drm_i915_private *i915)
41{
42 void *vaddr;
43 int err;
44
45 memset(h, 0, sizeof(*h));
46 h->i915 = i915;
47
48 h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
49 if (IS_ERR(h->hws))
50 return PTR_ERR(h->hws);
51
52 h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
53 if (IS_ERR(h->obj)) {
54 err = PTR_ERR(h->obj);
55 goto err_hws;
56 }
57
58 i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC);
59 vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB);
60 if (IS_ERR(vaddr)) {
61 err = PTR_ERR(vaddr);
62 goto err_obj;
63 }
64 h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
65
66 vaddr = i915_gem_object_pin_map(h->obj,
67 HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC);
68 if (IS_ERR(vaddr)) {
69 err = PTR_ERR(vaddr);
70 goto err_unpin_hws;
71 }
72 h->batch = vaddr;
73
74 return 0;
75
76err_unpin_hws:
77 i915_gem_object_unpin_map(h->hws);
78err_obj:
79 i915_gem_object_put(h->obj);
80err_hws:
81 i915_gem_object_put(h->hws);
82 return err;
83}
84
85static u64 hws_address(const struct i915_vma *hws,
86 const struct drm_i915_gem_request *rq)
87{
88 return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context);
89}
90
91static int emit_recurse_batch(struct hang *h,
92 struct drm_i915_gem_request *rq)
93{
94 struct drm_i915_private *i915 = h->i915;
95 struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base;
96 struct i915_vma *hws, *vma;
97 unsigned int flags;
98 u32 *batch;
99 int err;
100
101 vma = i915_vma_instance(h->obj, vm, NULL);
102 if (IS_ERR(vma))
103 return PTR_ERR(vma);
104
105 hws = i915_vma_instance(h->hws, vm, NULL);
106 if (IS_ERR(hws))
107 return PTR_ERR(hws);
108
109 err = i915_vma_pin(vma, 0, 0, PIN_USER);
110 if (err)
111 return err;
112
113 err = i915_vma_pin(hws, 0, 0, PIN_USER);
114 if (err)
115 goto unpin_vma;
116
117 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
118 if (err)
119 goto unpin_hws;
120
121 err = i915_switch_context(rq);
122 if (err)
123 goto unpin_hws;
124
125 i915_vma_move_to_active(vma, rq, 0);
126 if (!i915_gem_object_has_active_reference(vma->obj)) {
127 i915_gem_object_get(vma->obj);
128 i915_gem_object_set_active_reference(vma->obj);
129 }
130
131 i915_vma_move_to_active(hws, rq, 0);
132 if (!i915_gem_object_has_active_reference(hws->obj)) {
133 i915_gem_object_get(hws->obj);
134 i915_gem_object_set_active_reference(hws->obj);
135 }
136
137 batch = h->batch;
138 if (INTEL_GEN(i915) >= 8) {
139 *batch++ = MI_STORE_DWORD_IMM_GEN4;
140 *batch++ = lower_32_bits(hws_address(hws, rq));
141 *batch++ = upper_32_bits(hws_address(hws, rq));
142 *batch++ = rq->fence.seqno;
143 *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
144 *batch++ = lower_32_bits(vma->node.start);
145 *batch++ = upper_32_bits(vma->node.start);
146 } else if (INTEL_GEN(i915) >= 6) {
147 *batch++ = MI_STORE_DWORD_IMM_GEN4;
148 *batch++ = 0;
149 *batch++ = lower_32_bits(hws_address(hws, rq));
150 *batch++ = rq->fence.seqno;
151 *batch++ = MI_BATCH_BUFFER_START | 1 << 8;
152 *batch++ = lower_32_bits(vma->node.start);
153 } else if (INTEL_GEN(i915) >= 4) {
154 *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
155 *batch++ = 0;
156 *batch++ = lower_32_bits(hws_address(hws, rq));
157 *batch++ = rq->fence.seqno;
158 *batch++ = MI_BATCH_BUFFER_START | 2 << 6;
159 *batch++ = lower_32_bits(vma->node.start);
160 } else {
161 *batch++ = MI_STORE_DWORD_IMM;
162 *batch++ = lower_32_bits(hws_address(hws, rq));
163 *batch++ = rq->fence.seqno;
164 *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
165 *batch++ = lower_32_bits(vma->node.start);
166 }
167 *batch++ = MI_BATCH_BUFFER_END; /* not reached */
168
169 flags = 0;
170 if (INTEL_GEN(vm->i915) <= 5)
171 flags |= I915_DISPATCH_SECURE;
172
173 err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
174
175unpin_hws:
176 i915_vma_unpin(hws);
177unpin_vma:
178 i915_vma_unpin(vma);
179 return err;
180}
181
182static struct drm_i915_gem_request *
183hang_create_request(struct hang *h,
184 struct intel_engine_cs *engine,
185 struct i915_gem_context *ctx)
186{
187 struct drm_i915_gem_request *rq;
188 int err;
189
190 if (i915_gem_object_is_active(h->obj)) {
191 struct drm_i915_gem_object *obj;
192 void *vaddr;
193
194 obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
195 if (IS_ERR(obj))
196 return ERR_CAST(obj);
197
198 vaddr = i915_gem_object_pin_map(obj,
199 HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC);
200 if (IS_ERR(vaddr)) {
201 i915_gem_object_put(obj);
202 return ERR_CAST(vaddr);
203 }
204
205 i915_gem_object_unpin_map(h->obj);
206 i915_gem_object_put(h->obj);
207
208 h->obj = obj;
209 h->batch = vaddr;
210 }
211
212 rq = i915_gem_request_alloc(engine, ctx);
213 if (IS_ERR(rq))
214 return rq;
215
216 err = emit_recurse_batch(h, rq);
217 if (err) {
218 __i915_add_request(rq, false);
219 return ERR_PTR(err);
220 }
221
222 return rq;
223}
224
225static u32 hws_seqno(const struct hang *h,
226 const struct drm_i915_gem_request *rq)
227{
228 return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
229}
230
231static void hang_fini(struct hang *h)
232{
233 *h->batch = MI_BATCH_BUFFER_END;
234 wmb();
235
236 i915_gem_object_unpin_map(h->obj);
237 i915_gem_object_put(h->obj);
238
239 i915_gem_object_unpin_map(h->hws);
240 i915_gem_object_put(h->hws);
241
242 i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
496b575e
CW
243}
244
245static int igt_hang_sanitycheck(void *arg)
246{
247 struct drm_i915_private *i915 = arg;
248 struct drm_i915_gem_request *rq;
249 struct intel_engine_cs *engine;
250 enum intel_engine_id id;
251 struct hang h;
252 int err;
253
254 /* Basic check that we can execute our hanging batch */
255
496b575e
CW
256 mutex_lock(&i915->drm.struct_mutex);
257 err = hang_init(&h, i915);
258 if (err)
259 goto unlock;
260
261 for_each_engine(engine, i915, id) {
262 long timeout;
263
f2f5c061
CW
264 if (!intel_engine_can_store_dword(engine))
265 continue;
266
496b575e
CW
267 rq = hang_create_request(&h, engine, i915->kernel_context);
268 if (IS_ERR(rq)) {
269 err = PTR_ERR(rq);
270 pr_err("Failed to create request for %s, err=%d\n",
271 engine->name, err);
272 goto fini;
273 }
274
275 i915_gem_request_get(rq);
276
277 *h.batch = MI_BATCH_BUFFER_END;
278 __i915_add_request(rq, true);
279
280 timeout = i915_wait_request(rq,
281 I915_WAIT_LOCKED,
282 MAX_SCHEDULE_TIMEOUT);
283 i915_gem_request_put(rq);
284
285 if (timeout < 0) {
286 err = timeout;
287 pr_err("Wait for request failed on %s, err=%d\n",
288 engine->name, err);
289 goto fini;
290 }
291 }
292
293fini:
294 hang_fini(&h);
295unlock:
296 mutex_unlock(&i915->drm.struct_mutex);
297 return err;
298}
299
3744d49c
CW
300static void global_reset_lock(struct drm_i915_private *i915)
301{
302 struct intel_engine_cs *engine;
303 enum intel_engine_id id;
304
305 while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
306 wait_event(i915->gpu_error.reset_queue,
307 !test_bit(I915_RESET_BACKOFF,
308 &i915->gpu_error.flags));
309
310 for_each_engine(engine, i915, id) {
311 while (test_and_set_bit(I915_RESET_ENGINE + id,
312 &i915->gpu_error.flags))
313 wait_on_bit(&i915->gpu_error.flags,
314 I915_RESET_ENGINE + id,
315 TASK_UNINTERRUPTIBLE);
316 }
317}
318
319static void global_reset_unlock(struct drm_i915_private *i915)
320{
321 struct intel_engine_cs *engine;
322 enum intel_engine_id id;
323
324 for_each_engine(engine, i915, id)
325 clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
326
327 clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
328 wake_up_all(&i915->gpu_error.reset_queue);
329}
330
496b575e
CW
331static int igt_global_reset(void *arg)
332{
333 struct drm_i915_private *i915 = arg;
334 unsigned int reset_count;
335 int err = 0;
336
337 /* Check that we can issue a global GPU reset */
338
3744d49c 339 global_reset_lock(i915);
8c185eca 340 set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
496b575e
CW
341
342 mutex_lock(&i915->drm.struct_mutex);
343 reset_count = i915_reset_count(&i915->gpu_error);
344
535275d3 345 i915_reset(i915, I915_RESET_QUIET);
496b575e
CW
346
347 if (i915_reset_count(&i915->gpu_error) == reset_count) {
348 pr_err("No GPU reset recorded!\n");
349 err = -EINVAL;
350 }
351 mutex_unlock(&i915->drm.struct_mutex);
352
8c185eca 353 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
3744d49c 354 global_reset_unlock(i915);
d5367307 355
496b575e
CW
356 if (i915_terminally_wedged(&i915->gpu_error))
357 err = -EIO;
358
359 return err;
360}
361
abeb4def
MT
362static int igt_reset_engine(void *arg)
363{
364 struct drm_i915_private *i915 = arg;
365 struct intel_engine_cs *engine;
366 enum intel_engine_id id;
367 unsigned int reset_count, reset_engine_count;
368 int err = 0;
369
370 /* Check that we can issue a global GPU and engine reset */
371
372 if (!intel_has_reset_engine(i915))
373 return 0;
374
375 for_each_engine(engine, i915, id) {
376 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
377 reset_count = i915_reset_count(&i915->gpu_error);
378 reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
379 engine);
380
535275d3 381 err = i915_reset_engine(engine, I915_RESET_QUIET);
abeb4def
MT
382 if (err) {
383 pr_err("i915_reset_engine failed\n");
384 break;
385 }
386
387 if (i915_reset_count(&i915->gpu_error) != reset_count) {
388 pr_err("Full GPU reset recorded! (engine reset expected)\n");
389 err = -EINVAL;
390 break;
391 }
392
393 if (i915_reset_engine_count(&i915->gpu_error, engine) ==
394 reset_engine_count) {
395 pr_err("No %s engine reset recorded!\n", engine->name);
396 err = -EINVAL;
397 break;
398 }
399
400 clear_bit(I915_RESET_ENGINE + engine->id,
401 &i915->gpu_error.flags);
402 }
403
404 if (i915_terminally_wedged(&i915->gpu_error))
405 err = -EIO;
406
407 return err;
408}
409
79f0f472
CW
410static int active_engine(void *data)
411{
412 struct intel_engine_cs *engine = data;
413 struct drm_i915_gem_request *rq[2] = {};
414 struct i915_gem_context *ctx[2];
415 struct drm_file *file;
416 unsigned long count = 0;
417 int err = 0;
418
419 file = mock_file(engine->i915);
420 if (IS_ERR(file))
421 return PTR_ERR(file);
422
423 mutex_lock(&engine->i915->drm.struct_mutex);
424 ctx[0] = live_context(engine->i915, file);
425 mutex_unlock(&engine->i915->drm.struct_mutex);
426 if (IS_ERR(ctx[0])) {
427 err = PTR_ERR(ctx[0]);
428 goto err_file;
429 }
430
431 mutex_lock(&engine->i915->drm.struct_mutex);
432 ctx[1] = live_context(engine->i915, file);
433 mutex_unlock(&engine->i915->drm.struct_mutex);
434 if (IS_ERR(ctx[1])) {
435 err = PTR_ERR(ctx[1]);
436 i915_gem_context_put(ctx[0]);
437 goto err_file;
438 }
439
440 while (!kthread_should_stop()) {
441 unsigned int idx = count++ & 1;
442 struct drm_i915_gem_request *old = rq[idx];
443 struct drm_i915_gem_request *new;
444
445 mutex_lock(&engine->i915->drm.struct_mutex);
446 new = i915_gem_request_alloc(engine, ctx[idx]);
447 if (IS_ERR(new)) {
448 mutex_unlock(&engine->i915->drm.struct_mutex);
449 err = PTR_ERR(new);
450 break;
451 }
452
453 rq[idx] = i915_gem_request_get(new);
454 i915_add_request(new);
455 mutex_unlock(&engine->i915->drm.struct_mutex);
456
457 if (old) {
458 i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT);
459 i915_gem_request_put(old);
460 }
461 }
462
463 for (count = 0; count < ARRAY_SIZE(rq); count++)
464 i915_gem_request_put(rq[count]);
465
466err_file:
467 mock_file_free(engine->i915, file);
468 return err;
469}
470
471static int igt_reset_active_engines(void *arg)
472{
473 struct drm_i915_private *i915 = arg;
474 struct intel_engine_cs *engine, *active;
475 enum intel_engine_id id, tmp;
476 int err = 0;
477
478 /* Check that issuing a reset on one engine does not interfere
479 * with any other engine.
480 */
481
482 if (!intel_has_reset_engine(i915))
483 return 0;
484
485 for_each_engine(engine, i915, id) {
486 struct task_struct *threads[I915_NUM_ENGINES];
487 unsigned long resets[I915_NUM_ENGINES];
488 unsigned long global = i915_reset_count(&i915->gpu_error);
489 IGT_TIMEOUT(end_time);
490
491 memset(threads, 0, sizeof(threads));
492 for_each_engine(active, i915, tmp) {
493 struct task_struct *tsk;
494
495 if (active == engine)
496 continue;
497
498 resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
499 active);
500
501 tsk = kthread_run(active_engine, active,
502 "igt/%s", active->name);
503 if (IS_ERR(tsk)) {
504 err = PTR_ERR(tsk);
505 goto unwind;
506 }
507
508 threads[tmp] = tsk;
509 get_task_struct(tsk);
510 }
511
512 set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
513 do {
535275d3 514 err = i915_reset_engine(engine, I915_RESET_QUIET);
79f0f472
CW
515 if (err) {
516 pr_err("i915_reset_engine(%s) failed, err=%d\n",
517 engine->name, err);
518 break;
519 }
520 } while (time_before(jiffies, end_time));
521 clear_bit(I915_RESET_ENGINE + engine->id,
522 &i915->gpu_error.flags);
523
524unwind:
525 for_each_engine(active, i915, tmp) {
526 int ret;
527
528 if (!threads[tmp])
529 continue;
530
531 ret = kthread_stop(threads[tmp]);
532 if (ret) {
533 pr_err("kthread for active engine %s failed, err=%d\n",
534 active->name, ret);
535 if (!err)
536 err = ret;
537 }
538 put_task_struct(threads[tmp]);
539
540 if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
541 active)) {
542 pr_err("Innocent engine %s was reset (count=%ld)\n",
543 active->name,
544 i915_reset_engine_count(&i915->gpu_error,
545 active) - resets[tmp]);
546 err = -EIO;
547 }
548 }
549
550 if (global != i915_reset_count(&i915->gpu_error)) {
551 pr_err("Global reset (count=%ld)!\n",
552 i915_reset_count(&i915->gpu_error) - global);
553 err = -EIO;
554 }
555
556 if (err)
557 break;
558
559 cond_resched();
560 }
561
562 if (i915_terminally_wedged(&i915->gpu_error))
563 err = -EIO;
564
565 return err;
566}
567
496b575e
CW
568static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
569{
570 u32 reset_count;
571
572 rq->engine->hangcheck.stalled = true;
573 rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
574
575 reset_count = i915_reset_count(&rq->i915->gpu_error);
576
8c185eca 577 set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
496b575e
CW
578 wake_up_all(&rq->i915->gpu_error.wait_queue);
579
580 return reset_count;
581}
582
583static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
584{
585 return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
586 rq->fence.seqno),
587 10) &&
588 wait_for(i915_seqno_passed(hws_seqno(h, rq),
589 rq->fence.seqno),
590 1000));
591}
592
593static int igt_wait_reset(void *arg)
594{
595 struct drm_i915_private *i915 = arg;
596 struct drm_i915_gem_request *rq;
597 unsigned int reset_count;
598 struct hang h;
599 long timeout;
600 int err;
601
f2f5c061
CW
602 if (!intel_engine_can_store_dword(i915->engine[RCS]))
603 return 0;
604
496b575e
CW
605 /* Check that we detect a stuck waiter and issue a reset */
606
3744d49c 607 global_reset_lock(i915);
496b575e
CW
608
609 mutex_lock(&i915->drm.struct_mutex);
610 err = hang_init(&h, i915);
611 if (err)
612 goto unlock;
613
614 rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context);
615 if (IS_ERR(rq)) {
616 err = PTR_ERR(rq);
617 goto fini;
618 }
619
620 i915_gem_request_get(rq);
621 __i915_add_request(rq, true);
622
623 if (!wait_for_hang(&h, rq)) {
624 pr_err("Failed to start request %x\n", rq->fence.seqno);
625 err = -EIO;
626 goto out_rq;
627 }
628
629 reset_count = fake_hangcheck(rq);
630
631 timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10);
632 if (timeout < 0) {
633 pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
634 timeout);
635 err = timeout;
636 goto out_rq;
637 }
496b575e 638
8c185eca 639 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
496b575e
CW
640 if (i915_reset_count(&i915->gpu_error) == reset_count) {
641 pr_err("No GPU reset recorded!\n");
642 err = -EINVAL;
643 goto out_rq;
644 }
645
646out_rq:
647 i915_gem_request_put(rq);
648fini:
649 hang_fini(&h);
650unlock:
651 mutex_unlock(&i915->drm.struct_mutex);
3744d49c 652 global_reset_unlock(i915);
496b575e
CW
653
654 if (i915_terminally_wedged(&i915->gpu_error))
655 return -EIO;
656
657 return err;
658}
659
660static int igt_reset_queue(void *arg)
661{
662 struct drm_i915_private *i915 = arg;
663 struct intel_engine_cs *engine;
664 enum intel_engine_id id;
665 struct hang h;
666 int err;
667
668 /* Check that we replay pending requests following a hang */
669
3744d49c
CW
670 global_reset_lock(i915);
671
496b575e
CW
672 mutex_lock(&i915->drm.struct_mutex);
673 err = hang_init(&h, i915);
674 if (err)
675 goto unlock;
676
677 for_each_engine(engine, i915, id) {
678 struct drm_i915_gem_request *prev;
679 IGT_TIMEOUT(end_time);
680 unsigned int count;
681
f2f5c061
CW
682 if (!intel_engine_can_store_dword(engine))
683 continue;
684
496b575e
CW
685 prev = hang_create_request(&h, engine, i915->kernel_context);
686 if (IS_ERR(prev)) {
687 err = PTR_ERR(prev);
688 goto fini;
689 }
690
691 i915_gem_request_get(prev);
692 __i915_add_request(prev, true);
693
694 count = 0;
695 do {
696 struct drm_i915_gem_request *rq;
697 unsigned int reset_count;
698
699 rq = hang_create_request(&h,
700 engine,
701 i915->kernel_context);
702 if (IS_ERR(rq)) {
703 err = PTR_ERR(rq);
704 goto fini;
705 }
706
707 i915_gem_request_get(rq);
708 __i915_add_request(rq, true);
709
710 if (!wait_for_hang(&h, prev)) {
711 pr_err("Failed to start request %x\n",
712 prev->fence.seqno);
713 i915_gem_request_put(rq);
714 i915_gem_request_put(prev);
715 err = -EIO;
716 goto fini;
717 }
718
719 reset_count = fake_hangcheck(prev);
720
535275d3 721 i915_reset(i915, I915_RESET_QUIET);
496b575e 722
8c185eca 723 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
496b575e 724 &i915->gpu_error.flags));
8c185eca 725
496b575e
CW
726 if (prev->fence.error != -EIO) {
727 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
728 prev->fence.error);
729 i915_gem_request_put(rq);
730 i915_gem_request_put(prev);
731 err = -EINVAL;
732 goto fini;
733 }
734
735 if (rq->fence.error) {
736 pr_err("Fence error status not zero [%d] after unrelated reset\n",
737 rq->fence.error);
738 i915_gem_request_put(rq);
739 i915_gem_request_put(prev);
740 err = -EINVAL;
741 goto fini;
742 }
743
744 if (i915_reset_count(&i915->gpu_error) == reset_count) {
745 pr_err("No GPU reset recorded!\n");
746 i915_gem_request_put(rq);
747 i915_gem_request_put(prev);
748 err = -EINVAL;
749 goto fini;
750 }
751
752 i915_gem_request_put(prev);
753 prev = rq;
754 count++;
755 } while (time_before(jiffies, end_time));
756 pr_info("%s: Completed %d resets\n", engine->name, count);
757
758 *h.batch = MI_BATCH_BUFFER_END;
759 wmb();
760
761 i915_gem_request_put(prev);
762 }
763
764fini:
765 hang_fini(&h);
766unlock:
767 mutex_unlock(&i915->drm.struct_mutex);
3744d49c 768 global_reset_unlock(i915);
496b575e
CW
769
770 if (i915_terminally_wedged(&i915->gpu_error))
771 return -EIO;
772
773 return err;
774}
775
41533940 776static int igt_handle_error(void *arg)
abeb4def
MT
777{
778 struct drm_i915_private *i915 = arg;
779 struct intel_engine_cs *engine = i915->engine[RCS];
780 struct hang h;
781 struct drm_i915_gem_request *rq;
41533940
CW
782 struct i915_gpu_state *error;
783 int err;
abeb4def
MT
784
785 /* Check that we can issue a global GPU and engine reset */
786
787 if (!intel_has_reset_engine(i915))
788 return 0;
789
f2f5c061
CW
790 if (!intel_engine_can_store_dword(i915->engine[RCS]))
791 return 0;
792
abeb4def
MT
793 mutex_lock(&i915->drm.struct_mutex);
794
795 err = hang_init(&h, i915);
796 if (err)
774eed4a 797 goto err_unlock;
abeb4def
MT
798
799 rq = hang_create_request(&h, engine, i915->kernel_context);
800 if (IS_ERR(rq)) {
801 err = PTR_ERR(rq);
774eed4a 802 goto err_fini;
abeb4def
MT
803 }
804
805 i915_gem_request_get(rq);
806 __i915_add_request(rq, true);
807
abeb4def
MT
808 if (!wait_for_hang(&h, rq)) {
809 pr_err("Failed to start request %x\n", rq->fence.seqno);
810 err = -EIO;
774eed4a 811 goto err_request;
abeb4def
MT
812 }
813
abeb4def 814 mutex_unlock(&i915->drm.struct_mutex);
abeb4def 815
41533940
CW
816 /* Temporarily disable error capture */
817 error = xchg(&i915->gpu_error.first_error, (void *)-1);
abeb4def 818
41533940
CW
819 engine->hangcheck.stalled = true;
820 engine->hangcheck.seqno = intel_engine_get_seqno(engine);
abeb4def 821
41533940 822 i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
abeb4def 823
41533940 824 xchg(&i915->gpu_error.first_error, error);
abeb4def 825
41533940 826 mutex_lock(&i915->drm.struct_mutex);
abeb4def 827
41533940
CW
828 if (rq->fence.error != -EIO) {
829 pr_err("Guilty request not identified!\n");
830 err = -EINVAL;
831 goto err_request;
832 }
774eed4a
CW
833
834err_request:
835 i915_gem_request_put(rq);
836err_fini:
837 hang_fini(&h);
838err_unlock:
839 mutex_unlock(&i915->drm.struct_mutex);
41533940 840 return err;
abeb4def
MT
841}
842
496b575e
CW
843int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
844{
845 static const struct i915_subtest tests[] = {
846 SUBTEST(igt_hang_sanitycheck),
847 SUBTEST(igt_global_reset),
abeb4def 848 SUBTEST(igt_reset_engine),
79f0f472 849 SUBTEST(igt_reset_active_engines),
496b575e
CW
850 SUBTEST(igt_wait_reset),
851 SUBTEST(igt_reset_queue),
41533940 852 SUBTEST(igt_handle_error),
496b575e
CW
853 };
854
855 if (!intel_has_gpu_reset(i915))
856 return 0;
857
858 return i915_subtests(tests, i915);
859}