Commit | Line | Data |
---|---|---|
496b575e CW |
1 | /* |
2 | * Copyright © 2016 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | */ | |
24 | ||
79f0f472 CW |
25 | #include <linux/kthread.h> |
26 | ||
496b575e CW |
27 | #include "../i915_selftest.h" |
28 | ||
79f0f472 CW |
29 | #include "mock_context.h" |
30 | #include "mock_drm.h" | |
31 | ||
496b575e CW |
32 | struct hang { |
33 | struct drm_i915_private *i915; | |
34 | struct drm_i915_gem_object *hws; | |
35 | struct drm_i915_gem_object *obj; | |
36 | u32 *seqno; | |
37 | u32 *batch; | |
38 | }; | |
39 | ||
40 | static int hang_init(struct hang *h, struct drm_i915_private *i915) | |
41 | { | |
42 | void *vaddr; | |
43 | int err; | |
44 | ||
45 | memset(h, 0, sizeof(*h)); | |
46 | h->i915 = i915; | |
47 | ||
48 | h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); | |
49 | if (IS_ERR(h->hws)) | |
50 | return PTR_ERR(h->hws); | |
51 | ||
52 | h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); | |
53 | if (IS_ERR(h->obj)) { | |
54 | err = PTR_ERR(h->obj); | |
55 | goto err_hws; | |
56 | } | |
57 | ||
58 | i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); | |
59 | vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); | |
60 | if (IS_ERR(vaddr)) { | |
61 | err = PTR_ERR(vaddr); | |
62 | goto err_obj; | |
63 | } | |
64 | h->seqno = memset(vaddr, 0xff, PAGE_SIZE); | |
65 | ||
66 | vaddr = i915_gem_object_pin_map(h->obj, | |
67 | HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC); | |
68 | if (IS_ERR(vaddr)) { | |
69 | err = PTR_ERR(vaddr); | |
70 | goto err_unpin_hws; | |
71 | } | |
72 | h->batch = vaddr; | |
73 | ||
74 | return 0; | |
75 | ||
76 | err_unpin_hws: | |
77 | i915_gem_object_unpin_map(h->hws); | |
78 | err_obj: | |
79 | i915_gem_object_put(h->obj); | |
80 | err_hws: | |
81 | i915_gem_object_put(h->hws); | |
82 | return err; | |
83 | } | |
84 | ||
85 | static u64 hws_address(const struct i915_vma *hws, | |
86 | const struct drm_i915_gem_request *rq) | |
87 | { | |
88 | return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); | |
89 | } | |
90 | ||
91 | static int emit_recurse_batch(struct hang *h, | |
92 | struct drm_i915_gem_request *rq) | |
93 | { | |
94 | struct drm_i915_private *i915 = h->i915; | |
95 | struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; | |
96 | struct i915_vma *hws, *vma; | |
97 | unsigned int flags; | |
98 | u32 *batch; | |
99 | int err; | |
100 | ||
101 | vma = i915_vma_instance(h->obj, vm, NULL); | |
102 | if (IS_ERR(vma)) | |
103 | return PTR_ERR(vma); | |
104 | ||
105 | hws = i915_vma_instance(h->hws, vm, NULL); | |
106 | if (IS_ERR(hws)) | |
107 | return PTR_ERR(hws); | |
108 | ||
109 | err = i915_vma_pin(vma, 0, 0, PIN_USER); | |
110 | if (err) | |
111 | return err; | |
112 | ||
113 | err = i915_vma_pin(hws, 0, 0, PIN_USER); | |
114 | if (err) | |
115 | goto unpin_vma; | |
116 | ||
117 | err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); | |
118 | if (err) | |
119 | goto unpin_hws; | |
120 | ||
121 | err = i915_switch_context(rq); | |
122 | if (err) | |
123 | goto unpin_hws; | |
124 | ||
125 | i915_vma_move_to_active(vma, rq, 0); | |
126 | if (!i915_gem_object_has_active_reference(vma->obj)) { | |
127 | i915_gem_object_get(vma->obj); | |
128 | i915_gem_object_set_active_reference(vma->obj); | |
129 | } | |
130 | ||
131 | i915_vma_move_to_active(hws, rq, 0); | |
132 | if (!i915_gem_object_has_active_reference(hws->obj)) { | |
133 | i915_gem_object_get(hws->obj); | |
134 | i915_gem_object_set_active_reference(hws->obj); | |
135 | } | |
136 | ||
137 | batch = h->batch; | |
138 | if (INTEL_GEN(i915) >= 8) { | |
139 | *batch++ = MI_STORE_DWORD_IMM_GEN4; | |
140 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
141 | *batch++ = upper_32_bits(hws_address(hws, rq)); | |
142 | *batch++ = rq->fence.seqno; | |
143 | *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; | |
144 | *batch++ = lower_32_bits(vma->node.start); | |
145 | *batch++ = upper_32_bits(vma->node.start); | |
146 | } else if (INTEL_GEN(i915) >= 6) { | |
147 | *batch++ = MI_STORE_DWORD_IMM_GEN4; | |
148 | *batch++ = 0; | |
149 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
150 | *batch++ = rq->fence.seqno; | |
151 | *batch++ = MI_BATCH_BUFFER_START | 1 << 8; | |
152 | *batch++ = lower_32_bits(vma->node.start); | |
153 | } else if (INTEL_GEN(i915) >= 4) { | |
154 | *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; | |
155 | *batch++ = 0; | |
156 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
157 | *batch++ = rq->fence.seqno; | |
158 | *batch++ = MI_BATCH_BUFFER_START | 2 << 6; | |
159 | *batch++ = lower_32_bits(vma->node.start); | |
160 | } else { | |
161 | *batch++ = MI_STORE_DWORD_IMM; | |
162 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
163 | *batch++ = rq->fence.seqno; | |
164 | *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; | |
165 | *batch++ = lower_32_bits(vma->node.start); | |
166 | } | |
167 | *batch++ = MI_BATCH_BUFFER_END; /* not reached */ | |
168 | ||
169 | flags = 0; | |
170 | if (INTEL_GEN(vm->i915) <= 5) | |
171 | flags |= I915_DISPATCH_SECURE; | |
172 | ||
173 | err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); | |
174 | ||
175 | unpin_hws: | |
176 | i915_vma_unpin(hws); | |
177 | unpin_vma: | |
178 | i915_vma_unpin(vma); | |
179 | return err; | |
180 | } | |
181 | ||
182 | static struct drm_i915_gem_request * | |
183 | hang_create_request(struct hang *h, | |
184 | struct intel_engine_cs *engine, | |
185 | struct i915_gem_context *ctx) | |
186 | { | |
187 | struct drm_i915_gem_request *rq; | |
188 | int err; | |
189 | ||
190 | if (i915_gem_object_is_active(h->obj)) { | |
191 | struct drm_i915_gem_object *obj; | |
192 | void *vaddr; | |
193 | ||
194 | obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); | |
195 | if (IS_ERR(obj)) | |
196 | return ERR_CAST(obj); | |
197 | ||
198 | vaddr = i915_gem_object_pin_map(obj, | |
199 | HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC); | |
200 | if (IS_ERR(vaddr)) { | |
201 | i915_gem_object_put(obj); | |
202 | return ERR_CAST(vaddr); | |
203 | } | |
204 | ||
205 | i915_gem_object_unpin_map(h->obj); | |
206 | i915_gem_object_put(h->obj); | |
207 | ||
208 | h->obj = obj; | |
209 | h->batch = vaddr; | |
210 | } | |
211 | ||
212 | rq = i915_gem_request_alloc(engine, ctx); | |
213 | if (IS_ERR(rq)) | |
214 | return rq; | |
215 | ||
216 | err = emit_recurse_batch(h, rq); | |
217 | if (err) { | |
218 | __i915_add_request(rq, false); | |
219 | return ERR_PTR(err); | |
220 | } | |
221 | ||
222 | return rq; | |
223 | } | |
224 | ||
225 | static u32 hws_seqno(const struct hang *h, | |
226 | const struct drm_i915_gem_request *rq) | |
227 | { | |
228 | return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); | |
229 | } | |
230 | ||
231 | static void hang_fini(struct hang *h) | |
232 | { | |
233 | *h->batch = MI_BATCH_BUFFER_END; | |
234 | wmb(); | |
235 | ||
236 | i915_gem_object_unpin_map(h->obj); | |
237 | i915_gem_object_put(h->obj); | |
238 | ||
239 | i915_gem_object_unpin_map(h->hws); | |
240 | i915_gem_object_put(h->hws); | |
241 | ||
242 | i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); | |
496b575e CW |
243 | } |
244 | ||
245 | static int igt_hang_sanitycheck(void *arg) | |
246 | { | |
247 | struct drm_i915_private *i915 = arg; | |
248 | struct drm_i915_gem_request *rq; | |
249 | struct intel_engine_cs *engine; | |
250 | enum intel_engine_id id; | |
251 | struct hang h; | |
252 | int err; | |
253 | ||
254 | /* Basic check that we can execute our hanging batch */ | |
255 | ||
496b575e CW |
256 | mutex_lock(&i915->drm.struct_mutex); |
257 | err = hang_init(&h, i915); | |
258 | if (err) | |
259 | goto unlock; | |
260 | ||
261 | for_each_engine(engine, i915, id) { | |
262 | long timeout; | |
263 | ||
f2f5c061 CW |
264 | if (!intel_engine_can_store_dword(engine)) |
265 | continue; | |
266 | ||
496b575e CW |
267 | rq = hang_create_request(&h, engine, i915->kernel_context); |
268 | if (IS_ERR(rq)) { | |
269 | err = PTR_ERR(rq); | |
270 | pr_err("Failed to create request for %s, err=%d\n", | |
271 | engine->name, err); | |
272 | goto fini; | |
273 | } | |
274 | ||
275 | i915_gem_request_get(rq); | |
276 | ||
277 | *h.batch = MI_BATCH_BUFFER_END; | |
278 | __i915_add_request(rq, true); | |
279 | ||
280 | timeout = i915_wait_request(rq, | |
281 | I915_WAIT_LOCKED, | |
282 | MAX_SCHEDULE_TIMEOUT); | |
283 | i915_gem_request_put(rq); | |
284 | ||
285 | if (timeout < 0) { | |
286 | err = timeout; | |
287 | pr_err("Wait for request failed on %s, err=%d\n", | |
288 | engine->name, err); | |
289 | goto fini; | |
290 | } | |
291 | } | |
292 | ||
293 | fini: | |
294 | hang_fini(&h); | |
295 | unlock: | |
296 | mutex_unlock(&i915->drm.struct_mutex); | |
297 | return err; | |
298 | } | |
299 | ||
3744d49c CW |
300 | static void global_reset_lock(struct drm_i915_private *i915) |
301 | { | |
302 | struct intel_engine_cs *engine; | |
303 | enum intel_engine_id id; | |
304 | ||
305 | while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) | |
306 | wait_event(i915->gpu_error.reset_queue, | |
307 | !test_bit(I915_RESET_BACKOFF, | |
308 | &i915->gpu_error.flags)); | |
309 | ||
310 | for_each_engine(engine, i915, id) { | |
311 | while (test_and_set_bit(I915_RESET_ENGINE + id, | |
312 | &i915->gpu_error.flags)) | |
313 | wait_on_bit(&i915->gpu_error.flags, | |
314 | I915_RESET_ENGINE + id, | |
315 | TASK_UNINTERRUPTIBLE); | |
316 | } | |
317 | } | |
318 | ||
319 | static void global_reset_unlock(struct drm_i915_private *i915) | |
320 | { | |
321 | struct intel_engine_cs *engine; | |
322 | enum intel_engine_id id; | |
323 | ||
324 | for_each_engine(engine, i915, id) | |
325 | clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); | |
326 | ||
327 | clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); | |
328 | wake_up_all(&i915->gpu_error.reset_queue); | |
329 | } | |
330 | ||
496b575e CW |
331 | static int igt_global_reset(void *arg) |
332 | { | |
333 | struct drm_i915_private *i915 = arg; | |
334 | unsigned int reset_count; | |
335 | int err = 0; | |
336 | ||
337 | /* Check that we can issue a global GPU reset */ | |
338 | ||
3744d49c | 339 | global_reset_lock(i915); |
8c185eca | 340 | set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); |
496b575e CW |
341 | |
342 | mutex_lock(&i915->drm.struct_mutex); | |
343 | reset_count = i915_reset_count(&i915->gpu_error); | |
344 | ||
535275d3 | 345 | i915_reset(i915, I915_RESET_QUIET); |
496b575e CW |
346 | |
347 | if (i915_reset_count(&i915->gpu_error) == reset_count) { | |
348 | pr_err("No GPU reset recorded!\n"); | |
349 | err = -EINVAL; | |
350 | } | |
351 | mutex_unlock(&i915->drm.struct_mutex); | |
352 | ||
8c185eca | 353 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); |
3744d49c | 354 | global_reset_unlock(i915); |
d5367307 | 355 | |
496b575e CW |
356 | if (i915_terminally_wedged(&i915->gpu_error)) |
357 | err = -EIO; | |
358 | ||
359 | return err; | |
360 | } | |
361 | ||
abeb4def MT |
362 | static int igt_reset_engine(void *arg) |
363 | { | |
364 | struct drm_i915_private *i915 = arg; | |
365 | struct intel_engine_cs *engine; | |
366 | enum intel_engine_id id; | |
367 | unsigned int reset_count, reset_engine_count; | |
368 | int err = 0; | |
369 | ||
370 | /* Check that we can issue a global GPU and engine reset */ | |
371 | ||
372 | if (!intel_has_reset_engine(i915)) | |
373 | return 0; | |
374 | ||
375 | for_each_engine(engine, i915, id) { | |
376 | set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); | |
377 | reset_count = i915_reset_count(&i915->gpu_error); | |
378 | reset_engine_count = i915_reset_engine_count(&i915->gpu_error, | |
379 | engine); | |
380 | ||
535275d3 | 381 | err = i915_reset_engine(engine, I915_RESET_QUIET); |
abeb4def MT |
382 | if (err) { |
383 | pr_err("i915_reset_engine failed\n"); | |
384 | break; | |
385 | } | |
386 | ||
387 | if (i915_reset_count(&i915->gpu_error) != reset_count) { | |
388 | pr_err("Full GPU reset recorded! (engine reset expected)\n"); | |
389 | err = -EINVAL; | |
390 | break; | |
391 | } | |
392 | ||
393 | if (i915_reset_engine_count(&i915->gpu_error, engine) == | |
394 | reset_engine_count) { | |
395 | pr_err("No %s engine reset recorded!\n", engine->name); | |
396 | err = -EINVAL; | |
397 | break; | |
398 | } | |
399 | ||
400 | clear_bit(I915_RESET_ENGINE + engine->id, | |
401 | &i915->gpu_error.flags); | |
402 | } | |
403 | ||
404 | if (i915_terminally_wedged(&i915->gpu_error)) | |
405 | err = -EIO; | |
406 | ||
407 | return err; | |
408 | } | |
409 | ||
79f0f472 CW |
410 | static int active_engine(void *data) |
411 | { | |
412 | struct intel_engine_cs *engine = data; | |
413 | struct drm_i915_gem_request *rq[2] = {}; | |
414 | struct i915_gem_context *ctx[2]; | |
415 | struct drm_file *file; | |
416 | unsigned long count = 0; | |
417 | int err = 0; | |
418 | ||
419 | file = mock_file(engine->i915); | |
420 | if (IS_ERR(file)) | |
421 | return PTR_ERR(file); | |
422 | ||
423 | mutex_lock(&engine->i915->drm.struct_mutex); | |
424 | ctx[0] = live_context(engine->i915, file); | |
425 | mutex_unlock(&engine->i915->drm.struct_mutex); | |
426 | if (IS_ERR(ctx[0])) { | |
427 | err = PTR_ERR(ctx[0]); | |
428 | goto err_file; | |
429 | } | |
430 | ||
431 | mutex_lock(&engine->i915->drm.struct_mutex); | |
432 | ctx[1] = live_context(engine->i915, file); | |
433 | mutex_unlock(&engine->i915->drm.struct_mutex); | |
434 | if (IS_ERR(ctx[1])) { | |
435 | err = PTR_ERR(ctx[1]); | |
436 | i915_gem_context_put(ctx[0]); | |
437 | goto err_file; | |
438 | } | |
439 | ||
440 | while (!kthread_should_stop()) { | |
441 | unsigned int idx = count++ & 1; | |
442 | struct drm_i915_gem_request *old = rq[idx]; | |
443 | struct drm_i915_gem_request *new; | |
444 | ||
445 | mutex_lock(&engine->i915->drm.struct_mutex); | |
446 | new = i915_gem_request_alloc(engine, ctx[idx]); | |
447 | if (IS_ERR(new)) { | |
448 | mutex_unlock(&engine->i915->drm.struct_mutex); | |
449 | err = PTR_ERR(new); | |
450 | break; | |
451 | } | |
452 | ||
453 | rq[idx] = i915_gem_request_get(new); | |
454 | i915_add_request(new); | |
455 | mutex_unlock(&engine->i915->drm.struct_mutex); | |
456 | ||
457 | if (old) { | |
458 | i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT); | |
459 | i915_gem_request_put(old); | |
460 | } | |
461 | } | |
462 | ||
463 | for (count = 0; count < ARRAY_SIZE(rq); count++) | |
464 | i915_gem_request_put(rq[count]); | |
465 | ||
466 | err_file: | |
467 | mock_file_free(engine->i915, file); | |
468 | return err; | |
469 | } | |
470 | ||
471 | static int igt_reset_active_engines(void *arg) | |
472 | { | |
473 | struct drm_i915_private *i915 = arg; | |
474 | struct intel_engine_cs *engine, *active; | |
475 | enum intel_engine_id id, tmp; | |
476 | int err = 0; | |
477 | ||
478 | /* Check that issuing a reset on one engine does not interfere | |
479 | * with any other engine. | |
480 | */ | |
481 | ||
482 | if (!intel_has_reset_engine(i915)) | |
483 | return 0; | |
484 | ||
485 | for_each_engine(engine, i915, id) { | |
486 | struct task_struct *threads[I915_NUM_ENGINES]; | |
487 | unsigned long resets[I915_NUM_ENGINES]; | |
488 | unsigned long global = i915_reset_count(&i915->gpu_error); | |
489 | IGT_TIMEOUT(end_time); | |
490 | ||
491 | memset(threads, 0, sizeof(threads)); | |
492 | for_each_engine(active, i915, tmp) { | |
493 | struct task_struct *tsk; | |
494 | ||
495 | if (active == engine) | |
496 | continue; | |
497 | ||
498 | resets[tmp] = i915_reset_engine_count(&i915->gpu_error, | |
499 | active); | |
500 | ||
501 | tsk = kthread_run(active_engine, active, | |
502 | "igt/%s", active->name); | |
503 | if (IS_ERR(tsk)) { | |
504 | err = PTR_ERR(tsk); | |
505 | goto unwind; | |
506 | } | |
507 | ||
508 | threads[tmp] = tsk; | |
509 | get_task_struct(tsk); | |
510 | } | |
511 | ||
512 | set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); | |
513 | do { | |
535275d3 | 514 | err = i915_reset_engine(engine, I915_RESET_QUIET); |
79f0f472 CW |
515 | if (err) { |
516 | pr_err("i915_reset_engine(%s) failed, err=%d\n", | |
517 | engine->name, err); | |
518 | break; | |
519 | } | |
520 | } while (time_before(jiffies, end_time)); | |
521 | clear_bit(I915_RESET_ENGINE + engine->id, | |
522 | &i915->gpu_error.flags); | |
523 | ||
524 | unwind: | |
525 | for_each_engine(active, i915, tmp) { | |
526 | int ret; | |
527 | ||
528 | if (!threads[tmp]) | |
529 | continue; | |
530 | ||
531 | ret = kthread_stop(threads[tmp]); | |
532 | if (ret) { | |
533 | pr_err("kthread for active engine %s failed, err=%d\n", | |
534 | active->name, ret); | |
535 | if (!err) | |
536 | err = ret; | |
537 | } | |
538 | put_task_struct(threads[tmp]); | |
539 | ||
540 | if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error, | |
541 | active)) { | |
542 | pr_err("Innocent engine %s was reset (count=%ld)\n", | |
543 | active->name, | |
544 | i915_reset_engine_count(&i915->gpu_error, | |
545 | active) - resets[tmp]); | |
546 | err = -EIO; | |
547 | } | |
548 | } | |
549 | ||
550 | if (global != i915_reset_count(&i915->gpu_error)) { | |
551 | pr_err("Global reset (count=%ld)!\n", | |
552 | i915_reset_count(&i915->gpu_error) - global); | |
553 | err = -EIO; | |
554 | } | |
555 | ||
556 | if (err) | |
557 | break; | |
558 | ||
559 | cond_resched(); | |
560 | } | |
561 | ||
562 | if (i915_terminally_wedged(&i915->gpu_error)) | |
563 | err = -EIO; | |
564 | ||
565 | return err; | |
566 | } | |
567 | ||
496b575e CW |
568 | static u32 fake_hangcheck(struct drm_i915_gem_request *rq) |
569 | { | |
570 | u32 reset_count; | |
571 | ||
572 | rq->engine->hangcheck.stalled = true; | |
573 | rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); | |
574 | ||
575 | reset_count = i915_reset_count(&rq->i915->gpu_error); | |
576 | ||
8c185eca | 577 | set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags); |
496b575e CW |
578 | wake_up_all(&rq->i915->gpu_error.wait_queue); |
579 | ||
580 | return reset_count; | |
581 | } | |
582 | ||
583 | static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) | |
584 | { | |
585 | return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), | |
586 | rq->fence.seqno), | |
587 | 10) && | |
588 | wait_for(i915_seqno_passed(hws_seqno(h, rq), | |
589 | rq->fence.seqno), | |
590 | 1000)); | |
591 | } | |
592 | ||
593 | static int igt_wait_reset(void *arg) | |
594 | { | |
595 | struct drm_i915_private *i915 = arg; | |
596 | struct drm_i915_gem_request *rq; | |
597 | unsigned int reset_count; | |
598 | struct hang h; | |
599 | long timeout; | |
600 | int err; | |
601 | ||
f2f5c061 CW |
602 | if (!intel_engine_can_store_dword(i915->engine[RCS])) |
603 | return 0; | |
604 | ||
496b575e CW |
605 | /* Check that we detect a stuck waiter and issue a reset */ |
606 | ||
3744d49c | 607 | global_reset_lock(i915); |
496b575e CW |
608 | |
609 | mutex_lock(&i915->drm.struct_mutex); | |
610 | err = hang_init(&h, i915); | |
611 | if (err) | |
612 | goto unlock; | |
613 | ||
614 | rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); | |
615 | if (IS_ERR(rq)) { | |
616 | err = PTR_ERR(rq); | |
617 | goto fini; | |
618 | } | |
619 | ||
620 | i915_gem_request_get(rq); | |
621 | __i915_add_request(rq, true); | |
622 | ||
623 | if (!wait_for_hang(&h, rq)) { | |
624 | pr_err("Failed to start request %x\n", rq->fence.seqno); | |
625 | err = -EIO; | |
626 | goto out_rq; | |
627 | } | |
628 | ||
629 | reset_count = fake_hangcheck(rq); | |
630 | ||
631 | timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); | |
632 | if (timeout < 0) { | |
633 | pr_err("i915_wait_request failed on a stuck request: err=%ld\n", | |
634 | timeout); | |
635 | err = timeout; | |
636 | goto out_rq; | |
637 | } | |
496b575e | 638 | |
8c185eca | 639 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); |
496b575e CW |
640 | if (i915_reset_count(&i915->gpu_error) == reset_count) { |
641 | pr_err("No GPU reset recorded!\n"); | |
642 | err = -EINVAL; | |
643 | goto out_rq; | |
644 | } | |
645 | ||
646 | out_rq: | |
647 | i915_gem_request_put(rq); | |
648 | fini: | |
649 | hang_fini(&h); | |
650 | unlock: | |
651 | mutex_unlock(&i915->drm.struct_mutex); | |
3744d49c | 652 | global_reset_unlock(i915); |
496b575e CW |
653 | |
654 | if (i915_terminally_wedged(&i915->gpu_error)) | |
655 | return -EIO; | |
656 | ||
657 | return err; | |
658 | } | |
659 | ||
660 | static int igt_reset_queue(void *arg) | |
661 | { | |
662 | struct drm_i915_private *i915 = arg; | |
663 | struct intel_engine_cs *engine; | |
664 | enum intel_engine_id id; | |
665 | struct hang h; | |
666 | int err; | |
667 | ||
668 | /* Check that we replay pending requests following a hang */ | |
669 | ||
3744d49c CW |
670 | global_reset_lock(i915); |
671 | ||
496b575e CW |
672 | mutex_lock(&i915->drm.struct_mutex); |
673 | err = hang_init(&h, i915); | |
674 | if (err) | |
675 | goto unlock; | |
676 | ||
677 | for_each_engine(engine, i915, id) { | |
678 | struct drm_i915_gem_request *prev; | |
679 | IGT_TIMEOUT(end_time); | |
680 | unsigned int count; | |
681 | ||
f2f5c061 CW |
682 | if (!intel_engine_can_store_dword(engine)) |
683 | continue; | |
684 | ||
496b575e CW |
685 | prev = hang_create_request(&h, engine, i915->kernel_context); |
686 | if (IS_ERR(prev)) { | |
687 | err = PTR_ERR(prev); | |
688 | goto fini; | |
689 | } | |
690 | ||
691 | i915_gem_request_get(prev); | |
692 | __i915_add_request(prev, true); | |
693 | ||
694 | count = 0; | |
695 | do { | |
696 | struct drm_i915_gem_request *rq; | |
697 | unsigned int reset_count; | |
698 | ||
699 | rq = hang_create_request(&h, | |
700 | engine, | |
701 | i915->kernel_context); | |
702 | if (IS_ERR(rq)) { | |
703 | err = PTR_ERR(rq); | |
704 | goto fini; | |
705 | } | |
706 | ||
707 | i915_gem_request_get(rq); | |
708 | __i915_add_request(rq, true); | |
709 | ||
710 | if (!wait_for_hang(&h, prev)) { | |
711 | pr_err("Failed to start request %x\n", | |
712 | prev->fence.seqno); | |
713 | i915_gem_request_put(rq); | |
714 | i915_gem_request_put(prev); | |
715 | err = -EIO; | |
716 | goto fini; | |
717 | } | |
718 | ||
719 | reset_count = fake_hangcheck(prev); | |
720 | ||
535275d3 | 721 | i915_reset(i915, I915_RESET_QUIET); |
496b575e | 722 | |
8c185eca | 723 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, |
496b575e | 724 | &i915->gpu_error.flags)); |
8c185eca | 725 | |
496b575e CW |
726 | if (prev->fence.error != -EIO) { |
727 | pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", | |
728 | prev->fence.error); | |
729 | i915_gem_request_put(rq); | |
730 | i915_gem_request_put(prev); | |
731 | err = -EINVAL; | |
732 | goto fini; | |
733 | } | |
734 | ||
735 | if (rq->fence.error) { | |
736 | pr_err("Fence error status not zero [%d] after unrelated reset\n", | |
737 | rq->fence.error); | |
738 | i915_gem_request_put(rq); | |
739 | i915_gem_request_put(prev); | |
740 | err = -EINVAL; | |
741 | goto fini; | |
742 | } | |
743 | ||
744 | if (i915_reset_count(&i915->gpu_error) == reset_count) { | |
745 | pr_err("No GPU reset recorded!\n"); | |
746 | i915_gem_request_put(rq); | |
747 | i915_gem_request_put(prev); | |
748 | err = -EINVAL; | |
749 | goto fini; | |
750 | } | |
751 | ||
752 | i915_gem_request_put(prev); | |
753 | prev = rq; | |
754 | count++; | |
755 | } while (time_before(jiffies, end_time)); | |
756 | pr_info("%s: Completed %d resets\n", engine->name, count); | |
757 | ||
758 | *h.batch = MI_BATCH_BUFFER_END; | |
759 | wmb(); | |
760 | ||
761 | i915_gem_request_put(prev); | |
762 | } | |
763 | ||
764 | fini: | |
765 | hang_fini(&h); | |
766 | unlock: | |
767 | mutex_unlock(&i915->drm.struct_mutex); | |
3744d49c | 768 | global_reset_unlock(i915); |
496b575e CW |
769 | |
770 | if (i915_terminally_wedged(&i915->gpu_error)) | |
771 | return -EIO; | |
772 | ||
773 | return err; | |
774 | } | |
775 | ||
41533940 | 776 | static int igt_handle_error(void *arg) |
abeb4def MT |
777 | { |
778 | struct drm_i915_private *i915 = arg; | |
779 | struct intel_engine_cs *engine = i915->engine[RCS]; | |
780 | struct hang h; | |
781 | struct drm_i915_gem_request *rq; | |
41533940 CW |
782 | struct i915_gpu_state *error; |
783 | int err; | |
abeb4def MT |
784 | |
785 | /* Check that we can issue a global GPU and engine reset */ | |
786 | ||
787 | if (!intel_has_reset_engine(i915)) | |
788 | return 0; | |
789 | ||
f2f5c061 CW |
790 | if (!intel_engine_can_store_dword(i915->engine[RCS])) |
791 | return 0; | |
792 | ||
abeb4def MT |
793 | mutex_lock(&i915->drm.struct_mutex); |
794 | ||
795 | err = hang_init(&h, i915); | |
796 | if (err) | |
774eed4a | 797 | goto err_unlock; |
abeb4def MT |
798 | |
799 | rq = hang_create_request(&h, engine, i915->kernel_context); | |
800 | if (IS_ERR(rq)) { | |
801 | err = PTR_ERR(rq); | |
774eed4a | 802 | goto err_fini; |
abeb4def MT |
803 | } |
804 | ||
805 | i915_gem_request_get(rq); | |
806 | __i915_add_request(rq, true); | |
807 | ||
abeb4def MT |
808 | if (!wait_for_hang(&h, rq)) { |
809 | pr_err("Failed to start request %x\n", rq->fence.seqno); | |
810 | err = -EIO; | |
774eed4a | 811 | goto err_request; |
abeb4def MT |
812 | } |
813 | ||
abeb4def | 814 | mutex_unlock(&i915->drm.struct_mutex); |
abeb4def | 815 | |
41533940 CW |
816 | /* Temporarily disable error capture */ |
817 | error = xchg(&i915->gpu_error.first_error, (void *)-1); | |
abeb4def | 818 | |
41533940 CW |
819 | engine->hangcheck.stalled = true; |
820 | engine->hangcheck.seqno = intel_engine_get_seqno(engine); | |
abeb4def | 821 | |
41533940 | 822 | i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__); |
abeb4def | 823 | |
41533940 | 824 | xchg(&i915->gpu_error.first_error, error); |
abeb4def | 825 | |
41533940 | 826 | mutex_lock(&i915->drm.struct_mutex); |
abeb4def | 827 | |
41533940 CW |
828 | if (rq->fence.error != -EIO) { |
829 | pr_err("Guilty request not identified!\n"); | |
830 | err = -EINVAL; | |
831 | goto err_request; | |
832 | } | |
774eed4a CW |
833 | |
834 | err_request: | |
835 | i915_gem_request_put(rq); | |
836 | err_fini: | |
837 | hang_fini(&h); | |
838 | err_unlock: | |
839 | mutex_unlock(&i915->drm.struct_mutex); | |
41533940 | 840 | return err; |
abeb4def MT |
841 | } |
842 | ||
496b575e CW |
843 | int intel_hangcheck_live_selftests(struct drm_i915_private *i915) |
844 | { | |
845 | static const struct i915_subtest tests[] = { | |
846 | SUBTEST(igt_hang_sanitycheck), | |
847 | SUBTEST(igt_global_reset), | |
abeb4def | 848 | SUBTEST(igt_reset_engine), |
79f0f472 | 849 | SUBTEST(igt_reset_active_engines), |
496b575e CW |
850 | SUBTEST(igt_wait_reset), |
851 | SUBTEST(igt_reset_queue), | |
41533940 | 852 | SUBTEST(igt_handle_error), |
496b575e CW |
853 | }; |
854 | ||
855 | if (!intel_has_gpu_reset(i915)) | |
856 | return 0; | |
857 | ||
858 | return i915_subtests(tests, i915); | |
859 | } |