Commit | Line | Data |
---|---|---|
496b575e CW |
1 | /* |
2 | * Copyright © 2016 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | */ | |
24 | ||
79f0f472 CW |
25 | #include <linux/kthread.h> |
26 | ||
10be98a7 | 27 | #include "gem/i915_gem_context.h" |
e6ba7648 CW |
28 | |
29 | #include "intel_gt.h" | |
30 | #include "intel_engine_heartbeat.h" | |
79ffac85 CW |
31 | #include "intel_engine_pm.h" |
32 | ||
112ed2d3 CW |
33 | #include "i915_selftest.h" |
34 | #include "selftests/i915_random.h" | |
35 | #include "selftests/igt_flush_test.h" | |
36 | #include "selftests/igt_reset.h" | |
f6470c9b | 37 | #include "selftests/igt_atomic.h" |
496b575e | 38 | |
112ed2d3 | 39 | #include "selftests/mock_drm.h" |
79f0f472 | 40 | |
10be98a7 CW |
41 | #include "gem/selftests/mock_context.h" |
42 | #include "gem/selftests/igt_gem_utils.h" | |
43 | ||
935dff1a CW |
44 | #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ |
45 | ||
496b575e | 46 | struct hang { |
baea429d | 47 | struct intel_gt *gt; |
496b575e CW |
48 | struct drm_i915_gem_object *hws; |
49 | struct drm_i915_gem_object *obj; | |
8ec21a7c | 50 | struct i915_gem_context *ctx; |
496b575e CW |
51 | u32 *seqno; |
52 | u32 *batch; | |
53 | }; | |
54 | ||
cb823ed9 | 55 | static int hang_init(struct hang *h, struct intel_gt *gt) |
496b575e CW |
56 | { |
57 | void *vaddr; | |
58 | int err; | |
59 | ||
60 | memset(h, 0, sizeof(*h)); | |
cb823ed9 | 61 | h->gt = gt; |
496b575e | 62 | |
cb823ed9 | 63 | h->ctx = kernel_context(gt->i915); |
8ec21a7c CW |
64 | if (IS_ERR(h->ctx)) |
65 | return PTR_ERR(h->ctx); | |
66 | ||
e4106dae CW |
67 | GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); |
68 | ||
cb823ed9 | 69 | h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); |
8ec21a7c CW |
70 | if (IS_ERR(h->hws)) { |
71 | err = PTR_ERR(h->hws); | |
72 | goto err_ctx; | |
73 | } | |
496b575e | 74 | |
cb823ed9 | 75 | h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); |
496b575e CW |
76 | if (IS_ERR(h->obj)) { |
77 | err = PTR_ERR(h->obj); | |
78 | goto err_hws; | |
79 | } | |
80 | ||
a679f58d | 81 | i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC); |
496b575e CW |
82 | vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); |
83 | if (IS_ERR(vaddr)) { | |
84 | err = PTR_ERR(vaddr); | |
85 | goto err_obj; | |
86 | } | |
87 | h->seqno = memset(vaddr, 0xff, PAGE_SIZE); | |
88 | ||
89 | vaddr = i915_gem_object_pin_map(h->obj, | |
cb823ed9 | 90 | i915_coherent_map_type(gt->i915)); |
496b575e CW |
91 | if (IS_ERR(vaddr)) { |
92 | err = PTR_ERR(vaddr); | |
93 | goto err_unpin_hws; | |
94 | } | |
95 | h->batch = vaddr; | |
96 | ||
97 | return 0; | |
98 | ||
99 | err_unpin_hws: | |
100 | i915_gem_object_unpin_map(h->hws); | |
101 | err_obj: | |
102 | i915_gem_object_put(h->obj); | |
103 | err_hws: | |
104 | i915_gem_object_put(h->hws); | |
8ec21a7c CW |
105 | err_ctx: |
106 | kernel_context_close(h->ctx); | |
496b575e CW |
107 | return err; |
108 | } | |
109 | ||
110 | static u64 hws_address(const struct i915_vma *hws, | |
e61e0f51 | 111 | const struct i915_request *rq) |
496b575e CW |
112 | { |
113 | return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); | |
114 | } | |
115 | ||
8f98d4ba CW |
116 | static int move_to_active(struct i915_vma *vma, |
117 | struct i915_request *rq, | |
118 | unsigned int flags) | |
119 | { | |
120 | int err; | |
121 | ||
6951e589 | 122 | i915_vma_lock(vma); |
70d6894d CW |
123 | err = i915_request_await_object(rq, vma->obj, |
124 | flags & EXEC_OBJECT_WRITE); | |
125 | if (err == 0) | |
126 | err = i915_vma_move_to_active(vma, rq, flags); | |
6951e589 | 127 | i915_vma_unlock(vma); |
8f98d4ba | 128 | |
c017cf6b | 129 | return err; |
8f98d4ba CW |
130 | } |
131 | ||
132 | static struct i915_request * | |
133 | hang_create_request(struct hang *h, struct intel_engine_cs *engine) | |
496b575e | 134 | { |
cb823ed9 | 135 | struct intel_gt *gt = h->gt; |
a4e7ccda | 136 | struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); |
a93615f9 | 137 | struct drm_i915_gem_object *obj; |
8f98d4ba | 138 | struct i915_request *rq = NULL; |
496b575e CW |
139 | struct i915_vma *hws, *vma; |
140 | unsigned int flags; | |
a93615f9 | 141 | void *vaddr; |
496b575e CW |
142 | u32 *batch; |
143 | int err; | |
144 | ||
cb823ed9 | 145 | obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); |
a4e7ccda CW |
146 | if (IS_ERR(obj)) { |
147 | i915_vm_put(vm); | |
a93615f9 | 148 | return ERR_CAST(obj); |
a4e7ccda | 149 | } |
8f98d4ba | 150 | |
cb823ed9 | 151 | vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); |
a93615f9 CW |
152 | if (IS_ERR(vaddr)) { |
153 | i915_gem_object_put(obj); | |
a4e7ccda | 154 | i915_vm_put(vm); |
a93615f9 CW |
155 | return ERR_CAST(vaddr); |
156 | } | |
8f98d4ba | 157 | |
a93615f9 CW |
158 | i915_gem_object_unpin_map(h->obj); |
159 | i915_gem_object_put(h->obj); | |
8f98d4ba | 160 | |
a93615f9 CW |
161 | h->obj = obj; |
162 | h->batch = vaddr; | |
8f98d4ba | 163 | |
496b575e | 164 | vma = i915_vma_instance(h->obj, vm, NULL); |
a4e7ccda CW |
165 | if (IS_ERR(vma)) { |
166 | i915_vm_put(vm); | |
8f98d4ba | 167 | return ERR_CAST(vma); |
a4e7ccda | 168 | } |
496b575e CW |
169 | |
170 | hws = i915_vma_instance(h->hws, vm, NULL); | |
a4e7ccda CW |
171 | if (IS_ERR(hws)) { |
172 | i915_vm_put(vm); | |
8f98d4ba | 173 | return ERR_CAST(hws); |
a4e7ccda | 174 | } |
496b575e CW |
175 | |
176 | err = i915_vma_pin(vma, 0, 0, PIN_USER); | |
a4e7ccda CW |
177 | if (err) { |
178 | i915_vm_put(vm); | |
8f98d4ba | 179 | return ERR_PTR(err); |
a4e7ccda | 180 | } |
496b575e CW |
181 | |
182 | err = i915_vma_pin(hws, 0, 0, PIN_USER); | |
183 | if (err) | |
184 | goto unpin_vma; | |
185 | ||
46472b3e | 186 | rq = igt_request_alloc(h->ctx, engine); |
8f98d4ba CW |
187 | if (IS_ERR(rq)) { |
188 | err = PTR_ERR(rq); | |
a5236978 | 189 | goto unpin_hws; |
496b575e CW |
190 | } |
191 | ||
8f98d4ba | 192 | err = move_to_active(vma, rq, 0); |
a5236978 | 193 | if (err) |
8f98d4ba | 194 | goto cancel_rq; |
a5236978 | 195 | |
8f98d4ba CW |
196 | err = move_to_active(hws, rq, 0); |
197 | if (err) | |
198 | goto cancel_rq; | |
496b575e CW |
199 | |
200 | batch = h->batch; | |
cb823ed9 | 201 | if (INTEL_GEN(gt->i915) >= 8) { |
496b575e CW |
202 | *batch++ = MI_STORE_DWORD_IMM_GEN4; |
203 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
204 | *batch++ = upper_32_bits(hws_address(hws, rq)); | |
205 | *batch++ = rq->fence.seqno; | |
3fb04cb0 CW |
206 | *batch++ = MI_ARB_CHECK; |
207 | ||
208 | memset(batch, 0, 1024); | |
209 | batch += 1024 / sizeof(*batch); | |
210 | ||
211 | *batch++ = MI_ARB_CHECK; | |
496b575e CW |
212 | *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; |
213 | *batch++ = lower_32_bits(vma->node.start); | |
214 | *batch++ = upper_32_bits(vma->node.start); | |
cb823ed9 | 215 | } else if (INTEL_GEN(gt->i915) >= 6) { |
496b575e CW |
216 | *batch++ = MI_STORE_DWORD_IMM_GEN4; |
217 | *batch++ = 0; | |
218 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
219 | *batch++ = rq->fence.seqno; | |
3fb04cb0 CW |
220 | *batch++ = MI_ARB_CHECK; |
221 | ||
222 | memset(batch, 0, 1024); | |
223 | batch += 1024 / sizeof(*batch); | |
224 | ||
225 | *batch++ = MI_ARB_CHECK; | |
496b575e CW |
226 | *batch++ = MI_BATCH_BUFFER_START | 1 << 8; |
227 | *batch++ = lower_32_bits(vma->node.start); | |
cb823ed9 | 228 | } else if (INTEL_GEN(gt->i915) >= 4) { |
5b544337 | 229 | *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
496b575e CW |
230 | *batch++ = 0; |
231 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
232 | *batch++ = rq->fence.seqno; | |
3fb04cb0 CW |
233 | *batch++ = MI_ARB_CHECK; |
234 | ||
235 | memset(batch, 0, 1024); | |
236 | batch += 1024 / sizeof(*batch); | |
237 | ||
238 | *batch++ = MI_ARB_CHECK; | |
496b575e CW |
239 | *batch++ = MI_BATCH_BUFFER_START | 2 << 6; |
240 | *batch++ = lower_32_bits(vma->node.start); | |
241 | } else { | |
8fdbfd86 | 242 | *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; |
496b575e CW |
243 | *batch++ = lower_32_bits(hws_address(hws, rq)); |
244 | *batch++ = rq->fence.seqno; | |
3fb04cb0 CW |
245 | *batch++ = MI_ARB_CHECK; |
246 | ||
247 | memset(batch, 0, 1024); | |
248 | batch += 1024 / sizeof(*batch); | |
249 | ||
250 | *batch++ = MI_ARB_CHECK; | |
bb9e8755 | 251 | *batch++ = MI_BATCH_BUFFER_START | 2 << 6; |
496b575e CW |
252 | *batch++ = lower_32_bits(vma->node.start); |
253 | } | |
254 | *batch++ = MI_BATCH_BUFFER_END; /* not reached */ | |
baea429d | 255 | intel_gt_chipset_flush(engine->gt); |
496b575e | 256 | |
21182b3c CW |
257 | if (rq->engine->emit_init_breadcrumb) { |
258 | err = rq->engine->emit_init_breadcrumb(rq); | |
259 | if (err) | |
260 | goto cancel_rq; | |
261 | } | |
262 | ||
496b575e | 263 | flags = 0; |
cb823ed9 | 264 | if (INTEL_GEN(gt->i915) <= 5) |
496b575e CW |
265 | flags |= I915_DISPATCH_SECURE; |
266 | ||
267 | err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); | |
268 | ||
8f98d4ba CW |
269 | cancel_rq: |
270 | if (err) { | |
271 | i915_request_skip(rq, err); | |
272 | i915_request_add(rq); | |
273 | } | |
a5236978 | 274 | unpin_hws: |
496b575e CW |
275 | i915_vma_unpin(hws); |
276 | unpin_vma: | |
277 | i915_vma_unpin(vma); | |
a4e7ccda | 278 | i915_vm_put(vm); |
8f98d4ba | 279 | return err ? ERR_PTR(err) : rq; |
496b575e CW |
280 | } |
281 | ||
e61e0f51 | 282 | static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) |
496b575e CW |
283 | { |
284 | return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); | |
285 | } | |
286 | ||
287 | static void hang_fini(struct hang *h) | |
288 | { | |
289 | *h->batch = MI_BATCH_BUFFER_END; | |
76c5399f | 290 | intel_gt_chipset_flush(h->gt); |
496b575e CW |
291 | |
292 | i915_gem_object_unpin_map(h->obj); | |
293 | i915_gem_object_put(h->obj); | |
294 | ||
295 | i915_gem_object_unpin_map(h->hws); | |
296 | i915_gem_object_put(h->hws); | |
297 | ||
8ec21a7c CW |
298 | kernel_context_close(h->ctx); |
299 | ||
7e805762 | 300 | igt_flush_test(h->gt->i915); |
496b575e CW |
301 | } |
302 | ||
29991d53 | 303 | static bool wait_until_running(struct hang *h, struct i915_request *rq) |
3fb04cb0 CW |
304 | { |
305 | return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), | |
306 | rq->fence.seqno), | |
307 | 10) && | |
308 | wait_for(i915_seqno_passed(hws_seqno(h, rq), | |
309 | rq->fence.seqno), | |
310 | 1000)); | |
311 | } | |
312 | ||
e6ba7648 CW |
313 | static void engine_heartbeat_disable(struct intel_engine_cs *engine, |
314 | unsigned long *saved) | |
315 | { | |
316 | *saved = engine->props.heartbeat_interval_ms; | |
317 | engine->props.heartbeat_interval_ms = 0; | |
318 | ||
319 | intel_engine_pm_get(engine); | |
320 | intel_engine_park_heartbeat(engine); | |
321 | } | |
322 | ||
323 | static void engine_heartbeat_enable(struct intel_engine_cs *engine, | |
324 | unsigned long saved) | |
325 | { | |
326 | intel_engine_pm_put(engine); | |
327 | ||
328 | engine->props.heartbeat_interval_ms = saved; | |
329 | } | |
330 | ||
496b575e CW |
331 | static int igt_hang_sanitycheck(void *arg) |
332 | { | |
cb823ed9 | 333 | struct intel_gt *gt = arg; |
e61e0f51 | 334 | struct i915_request *rq; |
496b575e CW |
335 | struct intel_engine_cs *engine; |
336 | enum intel_engine_id id; | |
337 | struct hang h; | |
338 | int err; | |
339 | ||
340 | /* Basic check that we can execute our hanging batch */ | |
341 | ||
cb823ed9 | 342 | err = hang_init(&h, gt); |
496b575e | 343 | if (err) |
7e805762 | 344 | return err; |
496b575e | 345 | |
5d904e3c | 346 | for_each_engine(engine, gt, id) { |
cb823ed9 | 347 | struct intel_wedge_me w; |
496b575e CW |
348 | long timeout; |
349 | ||
f2f5c061 CW |
350 | if (!intel_engine_can_store_dword(engine)) |
351 | continue; | |
352 | ||
8ec21a7c | 353 | rq = hang_create_request(&h, engine); |
496b575e CW |
354 | if (IS_ERR(rq)) { |
355 | err = PTR_ERR(rq); | |
356 | pr_err("Failed to create request for %s, err=%d\n", | |
357 | engine->name, err); | |
358 | goto fini; | |
359 | } | |
360 | ||
e61e0f51 | 361 | i915_request_get(rq); |
496b575e CW |
362 | |
363 | *h.batch = MI_BATCH_BUFFER_END; | |
baea429d | 364 | intel_gt_chipset_flush(engine->gt); |
60456d5c | 365 | |
697b9a87 | 366 | i915_request_add(rq); |
496b575e | 367 | |
f81ff31c | 368 | timeout = 0; |
cb823ed9 | 369 | intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) |
2f530945 | 370 | timeout = i915_request_wait(rq, 0, |
f81ff31c | 371 | MAX_SCHEDULE_TIMEOUT); |
cb823ed9 | 372 | if (intel_gt_is_wedged(gt)) |
f81ff31c CW |
373 | timeout = -EIO; |
374 | ||
e61e0f51 | 375 | i915_request_put(rq); |
496b575e CW |
376 | |
377 | if (timeout < 0) { | |
378 | err = timeout; | |
379 | pr_err("Wait for request failed on %s, err=%d\n", | |
380 | engine->name, err); | |
381 | goto fini; | |
382 | } | |
383 | } | |
384 | ||
385 | fini: | |
386 | hang_fini(&h); | |
496b575e CW |
387 | return err; |
388 | } | |
389 | ||
935dff1a CW |
390 | static bool wait_for_idle(struct intel_engine_cs *engine) |
391 | { | |
392 | return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; | |
393 | } | |
394 | ||
36837510 CW |
395 | static int igt_reset_nop(void *arg) |
396 | { | |
cb823ed9 CW |
397 | struct intel_gt *gt = arg; |
398 | struct i915_gpu_error *global = >->i915->gpu_error; | |
36837510 | 399 | struct intel_engine_cs *engine; |
36837510 CW |
400 | unsigned int reset_count, count; |
401 | enum intel_engine_id id; | |
36837510 CW |
402 | IGT_TIMEOUT(end_time); |
403 | int err = 0; | |
404 | ||
405 | /* Check that we can reset during non-user portions of requests */ | |
406 | ||
cb823ed9 | 407 | reset_count = i915_reset_count(global); |
36837510 CW |
408 | count = 0; |
409 | do { | |
5d904e3c | 410 | for_each_engine(engine, gt, id) { |
e6ba7648 | 411 | struct intel_context *ce; |
36837510 CW |
412 | int i; |
413 | ||
e6ba7648 CW |
414 | ce = intel_context_create(engine); |
415 | if (IS_ERR(ce)) { | |
416 | err = PTR_ERR(ce); | |
417 | break; | |
418 | } | |
419 | ||
36837510 CW |
420 | for (i = 0; i < 16; i++) { |
421 | struct i915_request *rq; | |
422 | ||
e6ba7648 | 423 | rq = intel_context_create_request(ce); |
36837510 CW |
424 | if (IS_ERR(rq)) { |
425 | err = PTR_ERR(rq); | |
426 | break; | |
427 | } | |
428 | ||
429 | i915_request_add(rq); | |
430 | } | |
e6ba7648 CW |
431 | |
432 | intel_context_put(ce); | |
36837510 | 433 | } |
36837510 | 434 | |
cb823ed9 CW |
435 | igt_global_reset_lock(gt); |
436 | intel_gt_reset(gt, ALL_ENGINES, NULL); | |
437 | igt_global_reset_unlock(gt); | |
d8474795 | 438 | |
cb823ed9 | 439 | if (intel_gt_is_wedged(gt)) { |
36837510 CW |
440 | err = -EIO; |
441 | break; | |
442 | } | |
443 | ||
cb823ed9 | 444 | if (i915_reset_count(global) != reset_count + ++count) { |
36837510 CW |
445 | pr_err("Full GPU reset not recorded!\n"); |
446 | err = -EINVAL; | |
447 | break; | |
448 | } | |
449 | ||
7e805762 | 450 | err = igt_flush_test(gt->i915); |
36837510 CW |
451 | if (err) |
452 | break; | |
453 | } while (time_before(jiffies, end_time)); | |
454 | pr_info("%s: %d resets\n", __func__, count); | |
455 | ||
e6ba7648 | 456 | if (igt_flush_test(gt->i915)) |
36837510 CW |
457 | err = -EIO; |
458 | return err; | |
459 | } | |
460 | ||
461 | static int igt_reset_nop_engine(void *arg) | |
462 | { | |
cb823ed9 CW |
463 | struct intel_gt *gt = arg; |
464 | struct i915_gpu_error *global = >->i915->gpu_error; | |
36837510 | 465 | struct intel_engine_cs *engine; |
36837510 | 466 | enum intel_engine_id id; |
36837510 CW |
467 | |
468 | /* Check that we can engine-reset during non-user portions */ | |
469 | ||
260e6b71 | 470 | if (!intel_has_reset_engine(gt)) |
36837510 CW |
471 | return 0; |
472 | ||
5d904e3c | 473 | for_each_engine(engine, gt, id) { |
e6ba7648 CW |
474 | unsigned int reset_count, reset_engine_count, count; |
475 | struct intel_context *ce; | |
476 | unsigned long heartbeat; | |
36837510 | 477 | IGT_TIMEOUT(end_time); |
e6ba7648 CW |
478 | int err; |
479 | ||
480 | ce = intel_context_create(engine); | |
481 | if (IS_ERR(ce)) | |
482 | return PTR_ERR(ce); | |
36837510 | 483 | |
cb823ed9 CW |
484 | reset_count = i915_reset_count(global); |
485 | reset_engine_count = i915_reset_engine_count(global, engine); | |
36837510 CW |
486 | count = 0; |
487 | ||
e6ba7648 | 488 | engine_heartbeat_disable(engine, &heartbeat); |
cb823ed9 | 489 | set_bit(I915_RESET_ENGINE + id, >->reset.flags); |
36837510 CW |
490 | do { |
491 | int i; | |
492 | ||
493 | if (!wait_for_idle(engine)) { | |
494 | pr_err("%s failed to idle before reset\n", | |
495 | engine->name); | |
496 | err = -EIO; | |
497 | break; | |
498 | } | |
499 | ||
36837510 CW |
500 | for (i = 0; i < 16; i++) { |
501 | struct i915_request *rq; | |
502 | ||
e6ba7648 | 503 | rq = intel_context_create_request(ce); |
36837510 CW |
504 | if (IS_ERR(rq)) { |
505 | err = PTR_ERR(rq); | |
506 | break; | |
507 | } | |
508 | ||
509 | i915_request_add(rq); | |
510 | } | |
cb823ed9 | 511 | err = intel_engine_reset(engine, NULL); |
36837510 CW |
512 | if (err) { |
513 | pr_err("i915_reset_engine failed\n"); | |
514 | break; | |
515 | } | |
516 | ||
cb823ed9 | 517 | if (i915_reset_count(global) != reset_count) { |
36837510 CW |
518 | pr_err("Full GPU reset recorded! (engine reset expected)\n"); |
519 | err = -EINVAL; | |
520 | break; | |
521 | } | |
522 | ||
cb823ed9 | 523 | if (i915_reset_engine_count(global, engine) != |
36837510 CW |
524 | reset_engine_count + ++count) { |
525 | pr_err("%s engine reset not recorded!\n", | |
526 | engine->name); | |
527 | err = -EINVAL; | |
528 | break; | |
529 | } | |
36837510 | 530 | } while (time_before(jiffies, end_time)); |
cb823ed9 | 531 | clear_bit(I915_RESET_ENGINE + id, >->reset.flags); |
e6ba7648 | 532 | engine_heartbeat_enable(engine, heartbeat); |
36837510 | 533 | |
e6ba7648 | 534 | pr_info("%s(%s): %d resets\n", __func__, engine->name, count); |
36837510 | 535 | |
e6ba7648 CW |
536 | intel_context_put(ce); |
537 | if (igt_flush_test(gt->i915)) | |
538 | err = -EIO; | |
36837510 | 539 | if (err) |
e6ba7648 | 540 | return err; |
36837510 CW |
541 | } |
542 | ||
e6ba7648 | 543 | return 0; |
36837510 CW |
544 | } |
545 | ||
cb823ed9 | 546 | static int __igt_reset_engine(struct intel_gt *gt, bool active) |
abeb4def | 547 | { |
cb823ed9 | 548 | struct i915_gpu_error *global = >->i915->gpu_error; |
abeb4def MT |
549 | struct intel_engine_cs *engine; |
550 | enum intel_engine_id id; | |
3fb04cb0 | 551 | struct hang h; |
abeb4def MT |
552 | int err = 0; |
553 | ||
3fb04cb0 | 554 | /* Check that we can issue an engine reset on an idle engine (no-op) */ |
abeb4def | 555 | |
260e6b71 | 556 | if (!intel_has_reset_engine(gt)) |
abeb4def MT |
557 | return 0; |
558 | ||
3fb04cb0 | 559 | if (active) { |
cb823ed9 | 560 | err = hang_init(&h, gt); |
3fb04cb0 CW |
561 | if (err) |
562 | return err; | |
563 | } | |
564 | ||
5d904e3c | 565 | for_each_engine(engine, gt, id) { |
3fb04cb0 | 566 | unsigned int reset_count, reset_engine_count; |
e6ba7648 | 567 | unsigned long heartbeat; |
3fb04cb0 CW |
568 | IGT_TIMEOUT(end_time); |
569 | ||
570 | if (active && !intel_engine_can_store_dword(engine)) | |
571 | continue; | |
572 | ||
935dff1a CW |
573 | if (!wait_for_idle(engine)) { |
574 | pr_err("%s failed to idle before reset\n", | |
575 | engine->name); | |
576 | err = -EIO; | |
577 | break; | |
578 | } | |
579 | ||
cb823ed9 CW |
580 | reset_count = i915_reset_count(global); |
581 | reset_engine_count = i915_reset_engine_count(global, engine); | |
abeb4def | 582 | |
e6ba7648 | 583 | engine_heartbeat_disable(engine, &heartbeat); |
cb823ed9 | 584 | set_bit(I915_RESET_ENGINE + id, >->reset.flags); |
3fb04cb0 CW |
585 | do { |
586 | if (active) { | |
e61e0f51 | 587 | struct i915_request *rq; |
3fb04cb0 | 588 | |
8ec21a7c | 589 | rq = hang_create_request(&h, engine); |
3fb04cb0 CW |
590 | if (IS_ERR(rq)) { |
591 | err = PTR_ERR(rq); | |
3fb04cb0 CW |
592 | break; |
593 | } | |
594 | ||
e61e0f51 | 595 | i915_request_get(rq); |
697b9a87 | 596 | i915_request_add(rq); |
3fb04cb0 | 597 | |
29991d53 | 598 | if (!wait_until_running(&h, rq)) { |
cb823ed9 | 599 | struct drm_printer p = drm_info_printer(gt->i915->drm.dev); |
3fb04cb0 | 600 | |
dd847a70 | 601 | pr_err("%s: Failed to start request %llx, at %x\n", |
3fb04cb0 CW |
602 | __func__, rq->fence.seqno, hws_seqno(&h, rq)); |
603 | intel_engine_dump(engine, &p, | |
604 | "%s\n", engine->name); | |
605 | ||
e61e0f51 | 606 | i915_request_put(rq); |
3fb04cb0 CW |
607 | err = -EIO; |
608 | break; | |
609 | } | |
abeb4def | 610 | |
e61e0f51 | 611 | i915_request_put(rq); |
3fb04cb0 CW |
612 | } |
613 | ||
cb823ed9 | 614 | err = intel_engine_reset(engine, NULL); |
3fb04cb0 CW |
615 | if (err) { |
616 | pr_err("i915_reset_engine failed\n"); | |
617 | break; | |
618 | } | |
619 | ||
cb823ed9 | 620 | if (i915_reset_count(global) != reset_count) { |
3fb04cb0 CW |
621 | pr_err("Full GPU reset recorded! (engine reset expected)\n"); |
622 | err = -EINVAL; | |
623 | break; | |
624 | } | |
625 | ||
cb823ed9 | 626 | if (i915_reset_engine_count(global, engine) != |
9b974bde CW |
627 | ++reset_engine_count) { |
628 | pr_err("%s engine reset not recorded!\n", | |
629 | engine->name); | |
3fb04cb0 CW |
630 | err = -EINVAL; |
631 | break; | |
632 | } | |
3fb04cb0 | 633 | } while (time_before(jiffies, end_time)); |
cb823ed9 | 634 | clear_bit(I915_RESET_ENGINE + id, >->reset.flags); |
e6ba7648 | 635 | engine_heartbeat_enable(engine, heartbeat); |
abeb4def | 636 | |
3fb04cb0 | 637 | if (err) |
abeb4def | 638 | break; |
abeb4def | 639 | |
7e805762 | 640 | err = igt_flush_test(gt->i915); |
a8b66f2c CW |
641 | if (err) |
642 | break; | |
abeb4def MT |
643 | } |
644 | ||
cb823ed9 | 645 | if (intel_gt_is_wedged(gt)) |
abeb4def MT |
646 | err = -EIO; |
647 | ||
7e805762 | 648 | if (active) |
3fb04cb0 | 649 | hang_fini(&h); |
3fb04cb0 | 650 | |
abeb4def MT |
651 | return err; |
652 | } | |
653 | ||
3fb04cb0 CW |
654 | static int igt_reset_idle_engine(void *arg) |
655 | { | |
656 | return __igt_reset_engine(arg, false); | |
657 | } | |
658 | ||
659 | static int igt_reset_active_engine(void *arg) | |
660 | { | |
661 | return __igt_reset_engine(arg, true); | |
662 | } | |
663 | ||
a90507d6 CW |
664 | struct active_engine { |
665 | struct task_struct *task; | |
666 | struct intel_engine_cs *engine; | |
667 | unsigned long resets; | |
668 | unsigned int flags; | |
669 | }; | |
670 | ||
671 | #define TEST_ACTIVE BIT(0) | |
672 | #define TEST_OTHERS BIT(1) | |
673 | #define TEST_SELF BIT(2) | |
674 | #define TEST_PRIORITY BIT(3) | |
675 | ||
39d3cc03 CW |
676 | static int active_request_put(struct i915_request *rq) |
677 | { | |
678 | int err = 0; | |
679 | ||
680 | if (!rq) | |
681 | return 0; | |
682 | ||
683 | if (i915_request_wait(rq, 0, 5 * HZ) < 0) { | |
b300fde8 | 684 | GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", |
39d3cc03 CW |
685 | rq->engine->name, |
686 | rq->fence.context, | |
b300fde8 | 687 | rq->fence.seqno); |
39d3cc03 CW |
688 | GEM_TRACE_DUMP(); |
689 | ||
cb823ed9 | 690 | intel_gt_set_wedged(rq->engine->gt); |
39d3cc03 CW |
691 | err = -EIO; |
692 | } | |
693 | ||
694 | i915_request_put(rq); | |
695 | ||
696 | return err; | |
697 | } | |
698 | ||
79f0f472 CW |
699 | static int active_engine(void *data) |
700 | { | |
a90507d6 CW |
701 | I915_RND_STATE(prng); |
702 | struct active_engine *arg = data; | |
703 | struct intel_engine_cs *engine = arg->engine; | |
704 | struct i915_request *rq[8] = {}; | |
e6ba7648 CW |
705 | struct intel_context *ce[ARRAY_SIZE(rq)]; |
706 | unsigned long count; | |
79f0f472 CW |
707 | int err = 0; |
708 | ||
e6ba7648 CW |
709 | for (count = 0; count < ARRAY_SIZE(ce); count++) { |
710 | ce[count] = intel_context_create(engine); | |
711 | if (IS_ERR(ce[count])) { | |
712 | err = PTR_ERR(ce[count]); | |
a90507d6 | 713 | while (--count) |
e6ba7648 CW |
714 | intel_context_put(ce[count]); |
715 | return err; | |
a90507d6 | 716 | } |
79f0f472 CW |
717 | } |
718 | ||
e6ba7648 | 719 | count = 0; |
79f0f472 | 720 | while (!kthread_should_stop()) { |
a90507d6 | 721 | unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); |
e61e0f51 CW |
722 | struct i915_request *old = rq[idx]; |
723 | struct i915_request *new; | |
79f0f472 | 724 | |
e6ba7648 | 725 | new = intel_context_create_request(ce[idx]); |
79f0f472 | 726 | if (IS_ERR(new)) { |
79f0f472 CW |
727 | err = PTR_ERR(new); |
728 | break; | |
729 | } | |
730 | ||
e61e0f51 CW |
731 | rq[idx] = i915_request_get(new); |
732 | i915_request_add(new); | |
79f0f472 | 733 | |
e6ba7648 CW |
734 | if (engine->schedule && arg->flags & TEST_PRIORITY) { |
735 | struct i915_sched_attr attr = { | |
736 | .priority = | |
737 | i915_prandom_u32_max_state(512, &prng), | |
738 | }; | |
739 | engine->schedule(rq[idx], &attr); | |
740 | } | |
741 | ||
39d3cc03 CW |
742 | err = active_request_put(old); |
743 | if (err) | |
744 | break; | |
0ade4390 CW |
745 | |
746 | cond_resched(); | |
79f0f472 CW |
747 | } |
748 | ||
39d3cc03 CW |
749 | for (count = 0; count < ARRAY_SIZE(rq); count++) { |
750 | int err__ = active_request_put(rq[count]); | |
751 | ||
752 | /* Keep the first error */ | |
753 | if (!err) | |
754 | err = err__; | |
e6ba7648 CW |
755 | |
756 | intel_context_put(ce[count]); | |
39d3cc03 | 757 | } |
79f0f472 | 758 | |
79f0f472 CW |
759 | return err; |
760 | } | |
761 | ||
cb823ed9 | 762 | static int __igt_reset_engines(struct intel_gt *gt, |
a90507d6 CW |
763 | const char *test_name, |
764 | unsigned int flags) | |
79f0f472 | 765 | { |
cb823ed9 | 766 | struct i915_gpu_error *global = >->i915->gpu_error; |
3fb04cb0 | 767 | struct intel_engine_cs *engine, *other; |
79f0f472 | 768 | enum intel_engine_id id, tmp; |
3fb04cb0 | 769 | struct hang h; |
79f0f472 CW |
770 | int err = 0; |
771 | ||
772 | /* Check that issuing a reset on one engine does not interfere | |
773 | * with any other engine. | |
774 | */ | |
775 | ||
260e6b71 | 776 | if (!intel_has_reset_engine(gt)) |
79f0f472 CW |
777 | return 0; |
778 | ||
a90507d6 | 779 | if (flags & TEST_ACTIVE) { |
cb823ed9 | 780 | err = hang_init(&h, gt); |
3fb04cb0 CW |
781 | if (err) |
782 | return err; | |
a90507d6 CW |
783 | |
784 | if (flags & TEST_PRIORITY) | |
b7268c5e | 785 | h.ctx->sched.priority = 1024; |
3fb04cb0 CW |
786 | } |
787 | ||
5d904e3c | 788 | for_each_engine(engine, gt, id) { |
a90507d6 | 789 | struct active_engine threads[I915_NUM_ENGINES] = {}; |
cb823ed9 | 790 | unsigned long device = i915_reset_count(global); |
a90507d6 | 791 | unsigned long count = 0, reported; |
e6ba7648 | 792 | unsigned long heartbeat; |
79f0f472 CW |
793 | IGT_TIMEOUT(end_time); |
794 | ||
a90507d6 CW |
795 | if (flags & TEST_ACTIVE && |
796 | !intel_engine_can_store_dword(engine)) | |
3fb04cb0 CW |
797 | continue; |
798 | ||
935dff1a CW |
799 | if (!wait_for_idle(engine)) { |
800 | pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", | |
801 | engine->name, test_name); | |
802 | err = -EIO; | |
803 | break; | |
804 | } | |
805 | ||
79f0f472 | 806 | memset(threads, 0, sizeof(threads)); |
5d904e3c | 807 | for_each_engine(other, gt, tmp) { |
79f0f472 CW |
808 | struct task_struct *tsk; |
809 | ||
a90507d6 | 810 | threads[tmp].resets = |
cb823ed9 | 811 | i915_reset_engine_count(global, other); |
79f0f472 | 812 | |
a90507d6 | 813 | if (!(flags & TEST_OTHERS)) |
3fb04cb0 CW |
814 | continue; |
815 | ||
a90507d6 CW |
816 | if (other == engine && !(flags & TEST_SELF)) |
817 | continue; | |
818 | ||
819 | threads[tmp].engine = other; | |
820 | threads[tmp].flags = flags; | |
821 | ||
822 | tsk = kthread_run(active_engine, &threads[tmp], | |
3fb04cb0 | 823 | "igt/%s", other->name); |
79f0f472 CW |
824 | if (IS_ERR(tsk)) { |
825 | err = PTR_ERR(tsk); | |
826 | goto unwind; | |
827 | } | |
828 | ||
a90507d6 | 829 | threads[tmp].task = tsk; |
79f0f472 CW |
830 | get_task_struct(tsk); |
831 | } | |
832 | ||
e5661c6a CW |
833 | yield(); /* start all threads before we begin */ |
834 | ||
e6ba7648 | 835 | engine_heartbeat_disable(engine, &heartbeat); |
cb823ed9 | 836 | set_bit(I915_RESET_ENGINE + id, >->reset.flags); |
79f0f472 | 837 | do { |
a90507d6 | 838 | struct i915_request *rq = NULL; |
3fb04cb0 | 839 | |
a90507d6 | 840 | if (flags & TEST_ACTIVE) { |
8ec21a7c | 841 | rq = hang_create_request(&h, engine); |
3fb04cb0 CW |
842 | if (IS_ERR(rq)) { |
843 | err = PTR_ERR(rq); | |
3fb04cb0 CW |
844 | break; |
845 | } | |
846 | ||
e61e0f51 | 847 | i915_request_get(rq); |
697b9a87 | 848 | i915_request_add(rq); |
3fb04cb0 | 849 | |
29991d53 | 850 | if (!wait_until_running(&h, rq)) { |
cb823ed9 | 851 | struct drm_printer p = drm_info_printer(gt->i915->drm.dev); |
3fb04cb0 | 852 | |
dd847a70 | 853 | pr_err("%s: Failed to start request %llx, at %x\n", |
3fb04cb0 CW |
854 | __func__, rq->fence.seqno, hws_seqno(&h, rq)); |
855 | intel_engine_dump(engine, &p, | |
856 | "%s\n", engine->name); | |
857 | ||
e61e0f51 | 858 | i915_request_put(rq); |
3fb04cb0 CW |
859 | err = -EIO; |
860 | break; | |
861 | } | |
3fb04cb0 CW |
862 | } |
863 | ||
cb823ed9 | 864 | err = intel_engine_reset(engine, NULL); |
79f0f472 | 865 | if (err) { |
a90507d6 CW |
866 | pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", |
867 | engine->name, test_name, err); | |
79f0f472 CW |
868 | break; |
869 | } | |
3fb04cb0 | 870 | |
3fb04cb0 | 871 | count++; |
a90507d6 CW |
872 | |
873 | if (rq) { | |
22acf9fc CW |
874 | if (i915_request_wait(rq, 0, HZ / 5) < 0) { |
875 | struct drm_printer p = | |
cb823ed9 | 876 | drm_info_printer(gt->i915->drm.dev); |
22acf9fc CW |
877 | |
878 | pr_err("i915_reset_engine(%s:%s):" | |
879 | " failed to complete request after reset\n", | |
880 | engine->name, test_name); | |
881 | intel_engine_dump(engine, &p, | |
882 | "%s\n", engine->name); | |
883 | i915_request_put(rq); | |
884 | ||
885 | GEM_TRACE_DUMP(); | |
cb823ed9 | 886 | intel_gt_set_wedged(gt); |
22acf9fc CW |
887 | err = -EIO; |
888 | break; | |
889 | } | |
890 | ||
a90507d6 CW |
891 | i915_request_put(rq); |
892 | } | |
935dff1a CW |
893 | |
894 | if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { | |
895 | struct drm_printer p = | |
cb823ed9 | 896 | drm_info_printer(gt->i915->drm.dev); |
935dff1a CW |
897 | |
898 | pr_err("i915_reset_engine(%s:%s):" | |
899 | " failed to idle after reset\n", | |
900 | engine->name, test_name); | |
901 | intel_engine_dump(engine, &p, | |
902 | "%s\n", engine->name); | |
903 | ||
904 | err = -EIO; | |
905 | break; | |
906 | } | |
79f0f472 | 907 | } while (time_before(jiffies, end_time)); |
cb823ed9 | 908 | clear_bit(I915_RESET_ENGINE + id, >->reset.flags); |
e6ba7648 CW |
909 | engine_heartbeat_enable(engine, heartbeat); |
910 | ||
3fb04cb0 | 911 | pr_info("i915_reset_engine(%s:%s): %lu resets\n", |
a90507d6 CW |
912 | engine->name, test_name, count); |
913 | ||
cb823ed9 | 914 | reported = i915_reset_engine_count(global, engine); |
a90507d6 | 915 | reported -= threads[engine->id].resets; |
9b974bde CW |
916 | if (reported != count) { |
917 | pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", | |
918 | engine->name, test_name, count, reported); | |
3fb04cb0 CW |
919 | if (!err) |
920 | err = -EINVAL; | |
921 | } | |
79f0f472 CW |
922 | |
923 | unwind: | |
5d904e3c | 924 | for_each_engine(other, gt, tmp) { |
79f0f472 CW |
925 | int ret; |
926 | ||
a90507d6 | 927 | if (!threads[tmp].task) |
79f0f472 CW |
928 | continue; |
929 | ||
a90507d6 | 930 | ret = kthread_stop(threads[tmp].task); |
79f0f472 | 931 | if (ret) { |
3fb04cb0 CW |
932 | pr_err("kthread for other engine %s failed, err=%d\n", |
933 | other->name, ret); | |
79f0f472 CW |
934 | if (!err) |
935 | err = ret; | |
936 | } | |
a90507d6 | 937 | put_task_struct(threads[tmp].task); |
79f0f472 | 938 | |
cb823ed9 | 939 | if (other->uabi_class != engine->uabi_class && |
a90507d6 | 940 | threads[tmp].resets != |
cb823ed9 | 941 | i915_reset_engine_count(global, other)) { |
79f0f472 | 942 | pr_err("Innocent engine %s was reset (count=%ld)\n", |
3fb04cb0 | 943 | other->name, |
cb823ed9 | 944 | i915_reset_engine_count(global, other) - |
a90507d6 | 945 | threads[tmp].resets); |
3fb04cb0 CW |
946 | if (!err) |
947 | err = -EINVAL; | |
79f0f472 CW |
948 | } |
949 | } | |
950 | ||
cb823ed9 | 951 | if (device != i915_reset_count(global)) { |
79f0f472 | 952 | pr_err("Global reset (count=%ld)!\n", |
cb823ed9 | 953 | i915_reset_count(global) - device); |
3fb04cb0 CW |
954 | if (!err) |
955 | err = -EINVAL; | |
79f0f472 CW |
956 | } |
957 | ||
958 | if (err) | |
959 | break; | |
960 | ||
7e805762 | 961 | err = igt_flush_test(gt->i915); |
a8b66f2c CW |
962 | if (err) |
963 | break; | |
79f0f472 CW |
964 | } |
965 | ||
cb823ed9 | 966 | if (intel_gt_is_wedged(gt)) |
79f0f472 CW |
967 | err = -EIO; |
968 | ||
7e805762 | 969 | if (flags & TEST_ACTIVE) |
3fb04cb0 | 970 | hang_fini(&h); |
3fb04cb0 | 971 | |
79f0f472 CW |
972 | return err; |
973 | } | |
974 | ||
a90507d6 | 975 | static int igt_reset_engines(void *arg) |
3fb04cb0 | 976 | { |
a90507d6 CW |
977 | static const struct { |
978 | const char *name; | |
979 | unsigned int flags; | |
980 | } phases[] = { | |
981 | { "idle", 0 }, | |
982 | { "active", TEST_ACTIVE }, | |
983 | { "others-idle", TEST_OTHERS }, | |
984 | { "others-active", TEST_OTHERS | TEST_ACTIVE }, | |
985 | { | |
986 | "others-priority", | |
987 | TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | |
988 | }, | |
989 | { | |
990 | "self-priority", | |
991 | TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, | |
992 | }, | |
993 | { } | |
994 | }; | |
cb823ed9 | 995 | struct intel_gt *gt = arg; |
a90507d6 CW |
996 | typeof(*phases) *p; |
997 | int err; | |
3fb04cb0 | 998 | |
a90507d6 CW |
999 | for (p = phases; p->name; p++) { |
1000 | if (p->flags & TEST_PRIORITY) { | |
cb823ed9 | 1001 | if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) |
a90507d6 CW |
1002 | continue; |
1003 | } | |
1004 | ||
1005 | err = __igt_reset_engines(arg, p->name, p->flags); | |
1006 | if (err) | |
1007 | return err; | |
1008 | } | |
1009 | ||
1010 | return 0; | |
3fb04cb0 CW |
1011 | } |
1012 | ||
cb823ed9 | 1013 | static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask) |
496b575e | 1014 | { |
cb823ed9 | 1015 | u32 count = i915_reset_count(>->i915->gpu_error); |
496b575e | 1016 | |
cb823ed9 | 1017 | intel_gt_reset(gt, mask, NULL); |
496b575e | 1018 | |
eb8d0f5a | 1019 | return count; |
496b575e CW |
1020 | } |
1021 | ||
eb5f43d4 | 1022 | static int igt_reset_wait(void *arg) |
496b575e | 1023 | { |
cb823ed9 CW |
1024 | struct intel_gt *gt = arg; |
1025 | struct i915_gpu_error *global = >->i915->gpu_error; | |
1f9f6353 | 1026 | struct intel_engine_cs *engine = gt->engine[RCS0]; |
e61e0f51 | 1027 | struct i915_request *rq; |
496b575e CW |
1028 | unsigned int reset_count; |
1029 | struct hang h; | |
1030 | long timeout; | |
1031 | int err; | |
1032 | ||
cb823ed9 | 1033 | if (!engine || !intel_engine_can_store_dword(engine)) |
f2f5c061 CW |
1034 | return 0; |
1035 | ||
496b575e CW |
1036 | /* Check that we detect a stuck waiter and issue a reset */ |
1037 | ||
cb823ed9 | 1038 | igt_global_reset_lock(gt); |
496b575e | 1039 | |
cb823ed9 | 1040 | err = hang_init(&h, gt); |
496b575e CW |
1041 | if (err) |
1042 | goto unlock; | |
1043 | ||
cb823ed9 | 1044 | rq = hang_create_request(&h, engine); |
496b575e CW |
1045 | if (IS_ERR(rq)) { |
1046 | err = PTR_ERR(rq); | |
1047 | goto fini; | |
1048 | } | |
1049 | ||
e61e0f51 | 1050 | i915_request_get(rq); |
697b9a87 | 1051 | i915_request_add(rq); |
496b575e | 1052 | |
29991d53 | 1053 | if (!wait_until_running(&h, rq)) { |
cb823ed9 | 1054 | struct drm_printer p = drm_info_printer(gt->i915->drm.dev); |
95a19ab4 | 1055 | |
dd847a70 | 1056 | pr_err("%s: Failed to start request %llx, at %x\n", |
3fb04cb0 | 1057 | __func__, rq->fence.seqno, hws_seqno(&h, rq)); |
0db18b17 | 1058 | intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); |
87dc03ad | 1059 | |
cb823ed9 | 1060 | intel_gt_set_wedged(gt); |
87dc03ad | 1061 | |
496b575e CW |
1062 | err = -EIO; |
1063 | goto out_rq; | |
1064 | } | |
1065 | ||
cb823ed9 | 1066 | reset_count = fake_hangcheck(gt, ALL_ENGINES); |
496b575e | 1067 | |
2f530945 | 1068 | timeout = i915_request_wait(rq, 0, 10); |
496b575e | 1069 | if (timeout < 0) { |
e532be89 | 1070 | pr_err("i915_request_wait failed on a stuck request: err=%ld\n", |
496b575e CW |
1071 | timeout); |
1072 | err = timeout; | |
1073 | goto out_rq; | |
1074 | } | |
496b575e | 1075 | |
cb823ed9 | 1076 | if (i915_reset_count(global) == reset_count) { |
496b575e CW |
1077 | pr_err("No GPU reset recorded!\n"); |
1078 | err = -EINVAL; | |
1079 | goto out_rq; | |
1080 | } | |
1081 | ||
1082 | out_rq: | |
e61e0f51 | 1083 | i915_request_put(rq); |
496b575e CW |
1084 | fini: |
1085 | hang_fini(&h); | |
1086 | unlock: | |
cb823ed9 | 1087 | igt_global_reset_unlock(gt); |
496b575e | 1088 | |
cb823ed9 | 1089 | if (intel_gt_is_wedged(gt)) |
496b575e CW |
1090 | return -EIO; |
1091 | ||
1092 | return err; | |
1093 | } | |
1094 | ||
eb5f43d4 CW |
1095 | struct evict_vma { |
1096 | struct completion completion; | |
1097 | struct i915_vma *vma; | |
1098 | }; | |
1099 | ||
1100 | static int evict_vma(void *data) | |
1101 | { | |
1102 | struct evict_vma *arg = data; | |
1103 | struct i915_address_space *vm = arg->vma->vm; | |
eb5f43d4 CW |
1104 | struct drm_mm_node evict = arg->vma->node; |
1105 | int err; | |
1106 | ||
1107 | complete(&arg->completion); | |
1108 | ||
2850748e | 1109 | mutex_lock(&vm->mutex); |
eb5f43d4 | 1110 | err = i915_gem_evict_for_node(vm, &evict, 0); |
2850748e | 1111 | mutex_unlock(&vm->mutex); |
eb5f43d4 CW |
1112 | |
1113 | return err; | |
1114 | } | |
1115 | ||
6dc17d69 CW |
1116 | static int evict_fence(void *data) |
1117 | { | |
1118 | struct evict_vma *arg = data; | |
6dc17d69 CW |
1119 | int err; |
1120 | ||
1121 | complete(&arg->completion); | |
1122 | ||
6dc17d69 CW |
1123 | /* Mark the fence register as dirty to force the mmio update. */ |
1124 | err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); | |
1125 | if (err) { | |
1126 | pr_err("Invalid Y-tiling settings; err:%d\n", err); | |
2850748e | 1127 | return err; |
6dc17d69 CW |
1128 | } |
1129 | ||
e2ccc50a CW |
1130 | err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); |
1131 | if (err) { | |
1132 | pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); | |
2850748e | 1133 | return err; |
e2ccc50a CW |
1134 | } |
1135 | ||
6dc17d69 | 1136 | err = i915_vma_pin_fence(arg->vma); |
e2ccc50a | 1137 | i915_vma_unpin(arg->vma); |
6dc17d69 CW |
1138 | if (err) { |
1139 | pr_err("Unable to pin Y-tiled fence; err:%d\n", err); | |
2850748e | 1140 | return err; |
6dc17d69 CW |
1141 | } |
1142 | ||
1143 | i915_vma_unpin_fence(arg->vma); | |
1144 | ||
2850748e | 1145 | return 0; |
6dc17d69 CW |
1146 | } |
1147 | ||
cb823ed9 | 1148 | static int __igt_reset_evict_vma(struct intel_gt *gt, |
6dc17d69 CW |
1149 | struct i915_address_space *vm, |
1150 | int (*fn)(void *), | |
1151 | unsigned int flags) | |
eb5f43d4 | 1152 | { |
1f9f6353 | 1153 | struct intel_engine_cs *engine = gt->engine[RCS0]; |
eb5f43d4 CW |
1154 | struct drm_i915_gem_object *obj; |
1155 | struct task_struct *tsk = NULL; | |
1156 | struct i915_request *rq; | |
1157 | struct evict_vma arg; | |
1158 | struct hang h; | |
e60f7bb7 | 1159 | unsigned int pin_flags; |
eb5f43d4 CW |
1160 | int err; |
1161 | ||
e60f7bb7 MA |
1162 | if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) |
1163 | return 0; | |
1164 | ||
cb823ed9 | 1165 | if (!engine || !intel_engine_can_store_dword(engine)) |
eb5f43d4 CW |
1166 | return 0; |
1167 | ||
1168 | /* Check that we can recover an unbind stuck on a hanging request */ | |
1169 | ||
cb823ed9 | 1170 | err = hang_init(&h, gt); |
eb5f43d4 | 1171 | if (err) |
7e805762 | 1172 | return err; |
eb5f43d4 | 1173 | |
cb823ed9 | 1174 | obj = i915_gem_object_create_internal(gt->i915, SZ_1M); |
eb5f43d4 CW |
1175 | if (IS_ERR(obj)) { |
1176 | err = PTR_ERR(obj); | |
1177 | goto fini; | |
1178 | } | |
1179 | ||
6dc17d69 CW |
1180 | if (flags & EXEC_OBJECT_NEEDS_FENCE) { |
1181 | err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512); | |
1182 | if (err) { | |
1183 | pr_err("Invalid X-tiling settings; err:%d\n", err); | |
1184 | goto out_obj; | |
1185 | } | |
1186 | } | |
1187 | ||
eb5f43d4 CW |
1188 | arg.vma = i915_vma_instance(obj, vm, NULL); |
1189 | if (IS_ERR(arg.vma)) { | |
1190 | err = PTR_ERR(arg.vma); | |
1191 | goto out_obj; | |
1192 | } | |
1193 | ||
cb823ed9 | 1194 | rq = hang_create_request(&h, engine); |
eb5f43d4 CW |
1195 | if (IS_ERR(rq)) { |
1196 | err = PTR_ERR(rq); | |
1197 | goto out_obj; | |
1198 | } | |
1199 | ||
e60f7bb7 MA |
1200 | pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; |
1201 | ||
1202 | if (flags & EXEC_OBJECT_NEEDS_FENCE) | |
1203 | pin_flags |= PIN_MAPPABLE; | |
1204 | ||
1205 | err = i915_vma_pin(arg.vma, 0, 0, pin_flags); | |
6dc17d69 CW |
1206 | if (err) { |
1207 | i915_request_add(rq); | |
eb5f43d4 | 1208 | goto out_obj; |
6dc17d69 CW |
1209 | } |
1210 | ||
1211 | if (flags & EXEC_OBJECT_NEEDS_FENCE) { | |
1212 | err = i915_vma_pin_fence(arg.vma); | |
1213 | if (err) { | |
1214 | pr_err("Unable to pin X-tiled fence; err:%d\n", err); | |
1215 | i915_vma_unpin(arg.vma); | |
1216 | i915_request_add(rq); | |
1217 | goto out_obj; | |
1218 | } | |
1219 | } | |
eb5f43d4 | 1220 | |
6951e589 | 1221 | i915_vma_lock(arg.vma); |
70d6894d CW |
1222 | err = i915_request_await_object(rq, arg.vma->obj, |
1223 | flags & EXEC_OBJECT_WRITE); | |
1224 | if (err == 0) | |
1225 | err = i915_vma_move_to_active(arg.vma, rq, flags); | |
6951e589 | 1226 | i915_vma_unlock(arg.vma); |
6dc17d69 CW |
1227 | |
1228 | if (flags & EXEC_OBJECT_NEEDS_FENCE) | |
1229 | i915_vma_unpin_fence(arg.vma); | |
eb5f43d4 CW |
1230 | i915_vma_unpin(arg.vma); |
1231 | ||
1232 | i915_request_get(rq); | |
1233 | i915_request_add(rq); | |
1234 | if (err) | |
1235 | goto out_rq; | |
1236 | ||
eb5f43d4 | 1237 | if (!wait_until_running(&h, rq)) { |
cb823ed9 | 1238 | struct drm_printer p = drm_info_printer(gt->i915->drm.dev); |
eb5f43d4 | 1239 | |
dd847a70 | 1240 | pr_err("%s: Failed to start request %llx, at %x\n", |
eb5f43d4 CW |
1241 | __func__, rq->fence.seqno, hws_seqno(&h, rq)); |
1242 | intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); | |
1243 | ||
cb823ed9 | 1244 | intel_gt_set_wedged(gt); |
eb5f43d4 CW |
1245 | goto out_reset; |
1246 | } | |
1247 | ||
1248 | init_completion(&arg.completion); | |
1249 | ||
6dc17d69 | 1250 | tsk = kthread_run(fn, &arg, "igt/evict_vma"); |
eb5f43d4 CW |
1251 | if (IS_ERR(tsk)) { |
1252 | err = PTR_ERR(tsk); | |
1253 | tsk = NULL; | |
1254 | goto out_reset; | |
1255 | } | |
e32c8d3c | 1256 | get_task_struct(tsk); |
eb5f43d4 CW |
1257 | |
1258 | wait_for_completion(&arg.completion); | |
1259 | ||
52c0fdb2 | 1260 | if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { |
cb823ed9 | 1261 | struct drm_printer p = drm_info_printer(gt->i915->drm.dev); |
eb5f43d4 CW |
1262 | |
1263 | pr_err("igt/evict_vma kthread did not wait\n"); | |
1264 | intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); | |
1265 | ||
cb823ed9 | 1266 | intel_gt_set_wedged(gt); |
eb5f43d4 CW |
1267 | goto out_reset; |
1268 | } | |
1269 | ||
1270 | out_reset: | |
cb823ed9 CW |
1271 | igt_global_reset_lock(gt); |
1272 | fake_hangcheck(gt, rq->engine->mask); | |
1273 | igt_global_reset_unlock(gt); | |
eb5f43d4 CW |
1274 | |
1275 | if (tsk) { | |
cb823ed9 | 1276 | struct intel_wedge_me w; |
eb5f43d4 CW |
1277 | |
1278 | /* The reset, even indirectly, should take less than 10ms. */ | |
cb823ed9 | 1279 | intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) |
eb5f43d4 | 1280 | err = kthread_stop(tsk); |
e32c8d3c CW |
1281 | |
1282 | put_task_struct(tsk); | |
eb5f43d4 CW |
1283 | } |
1284 | ||
eb5f43d4 CW |
1285 | out_rq: |
1286 | i915_request_put(rq); | |
1287 | out_obj: | |
1288 | i915_gem_object_put(obj); | |
1289 | fini: | |
1290 | hang_fini(&h); | |
cb823ed9 | 1291 | if (intel_gt_is_wedged(gt)) |
eb5f43d4 CW |
1292 | return -EIO; |
1293 | ||
1294 | return err; | |
1295 | } | |
1296 | ||
1297 | static int igt_reset_evict_ggtt(void *arg) | |
1298 | { | |
cb823ed9 | 1299 | struct intel_gt *gt = arg; |
eb5f43d4 | 1300 | |
cb823ed9 | 1301 | return __igt_reset_evict_vma(gt, >->ggtt->vm, |
6dc17d69 | 1302 | evict_vma, EXEC_OBJECT_WRITE); |
eb5f43d4 CW |
1303 | } |
1304 | ||
1305 | static int igt_reset_evict_ppgtt(void *arg) | |
1306 | { | |
cb823ed9 | 1307 | struct intel_gt *gt = arg; |
e6ba7648 | 1308 | struct i915_ppgtt *ppgtt; |
eb5f43d4 CW |
1309 | int err; |
1310 | ||
e6ba7648 CW |
1311 | /* aliasing == global gtt locking, covered above */ |
1312 | if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) | |
1313 | return 0; | |
ab84a110 | 1314 | |
2c86e55d | 1315 | ppgtt = i915_ppgtt_create(gt); |
e6ba7648 CW |
1316 | if (IS_ERR(ppgtt)) |
1317 | return PTR_ERR(ppgtt); | |
eb5f43d4 | 1318 | |
e6ba7648 CW |
1319 | err = __igt_reset_evict_vma(gt, &ppgtt->vm, |
1320 | evict_vma, EXEC_OBJECT_WRITE); | |
1321 | i915_vm_put(&ppgtt->vm); | |
eb5f43d4 | 1322 | |
eb5f43d4 CW |
1323 | return err; |
1324 | } | |
1325 | ||
6dc17d69 CW |
1326 | static int igt_reset_evict_fence(void *arg) |
1327 | { | |
cb823ed9 | 1328 | struct intel_gt *gt = arg; |
6dc17d69 | 1329 | |
cb823ed9 | 1330 | return __igt_reset_evict_vma(gt, >->ggtt->vm, |
6dc17d69 CW |
1331 | evict_fence, EXEC_OBJECT_NEEDS_FENCE); |
1332 | } | |
1333 | ||
cb823ed9 | 1334 | static int wait_for_others(struct intel_gt *gt, |
02866679 CW |
1335 | struct intel_engine_cs *exclude) |
1336 | { | |
1337 | struct intel_engine_cs *engine; | |
1338 | enum intel_engine_id id; | |
1339 | ||
5d904e3c | 1340 | for_each_engine(engine, gt, id) { |
02866679 CW |
1341 | if (engine == exclude) |
1342 | continue; | |
1343 | ||
935dff1a | 1344 | if (!wait_for_idle(engine)) |
02866679 CW |
1345 | return -EIO; |
1346 | } | |
1347 | ||
1348 | return 0; | |
1349 | } | |
1350 | ||
496b575e CW |
1351 | static int igt_reset_queue(void *arg) |
1352 | { | |
cb823ed9 CW |
1353 | struct intel_gt *gt = arg; |
1354 | struct i915_gpu_error *global = >->i915->gpu_error; | |
496b575e CW |
1355 | struct intel_engine_cs *engine; |
1356 | enum intel_engine_id id; | |
1357 | struct hang h; | |
1358 | int err; | |
1359 | ||
1360 | /* Check that we replay pending requests following a hang */ | |
1361 | ||
cb823ed9 | 1362 | igt_global_reset_lock(gt); |
3744d49c | 1363 | |
cb823ed9 | 1364 | err = hang_init(&h, gt); |
496b575e CW |
1365 | if (err) |
1366 | goto unlock; | |
1367 | ||
5d904e3c | 1368 | for_each_engine(engine, gt, id) { |
e61e0f51 | 1369 | struct i915_request *prev; |
496b575e CW |
1370 | IGT_TIMEOUT(end_time); |
1371 | unsigned int count; | |
1372 | ||
f2f5c061 CW |
1373 | if (!intel_engine_can_store_dword(engine)) |
1374 | continue; | |
1375 | ||
8ec21a7c | 1376 | prev = hang_create_request(&h, engine); |
496b575e CW |
1377 | if (IS_ERR(prev)) { |
1378 | err = PTR_ERR(prev); | |
1379 | goto fini; | |
1380 | } | |
1381 | ||
e61e0f51 | 1382 | i915_request_get(prev); |
697b9a87 | 1383 | i915_request_add(prev); |
496b575e CW |
1384 | |
1385 | count = 0; | |
1386 | do { | |
e61e0f51 | 1387 | struct i915_request *rq; |
496b575e CW |
1388 | unsigned int reset_count; |
1389 | ||
8ec21a7c | 1390 | rq = hang_create_request(&h, engine); |
496b575e CW |
1391 | if (IS_ERR(rq)) { |
1392 | err = PTR_ERR(rq); | |
1393 | goto fini; | |
1394 | } | |
1395 | ||
e61e0f51 | 1396 | i915_request_get(rq); |
697b9a87 | 1397 | i915_request_add(rq); |
496b575e | 1398 | |
02866679 CW |
1399 | /* |
1400 | * XXX We don't handle resetting the kernel context | |
1401 | * very well. If we trigger a device reset twice in | |
1402 | * quick succession while the kernel context is | |
1403 | * executing, we may end up skipping the breadcrumb. | |
1404 | * This is really only a problem for the selftest as | |
1405 | * normally there is a large interlude between resets | |
1406 | * (hangcheck), or we focus on resetting just one | |
1407 | * engine and so avoid repeatedly resetting innocents. | |
1408 | */ | |
cb823ed9 | 1409 | err = wait_for_others(gt, engine); |
02866679 CW |
1410 | if (err) { |
1411 | pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", | |
1412 | __func__, engine->name); | |
1413 | i915_request_put(rq); | |
1414 | i915_request_put(prev); | |
1415 | ||
1416 | GEM_TRACE_DUMP(); | |
cb823ed9 | 1417 | intel_gt_set_wedged(gt); |
02866679 CW |
1418 | goto fini; |
1419 | } | |
1420 | ||
29991d53 | 1421 | if (!wait_until_running(&h, prev)) { |
cb823ed9 | 1422 | struct drm_printer p = drm_info_printer(gt->i915->drm.dev); |
95a19ab4 | 1423 | |
dd847a70 | 1424 | pr_err("%s(%s): Failed to start request %llx, at %x\n", |
02866679 CW |
1425 | __func__, engine->name, |
1426 | prev->fence.seqno, hws_seqno(&h, prev)); | |
1427 | intel_engine_dump(engine, &p, | |
1428 | "%s\n", engine->name); | |
95a19ab4 | 1429 | |
e61e0f51 CW |
1430 | i915_request_put(rq); |
1431 | i915_request_put(prev); | |
87dc03ad | 1432 | |
cb823ed9 | 1433 | intel_gt_set_wedged(gt); |
87dc03ad | 1434 | |
496b575e CW |
1435 | err = -EIO; |
1436 | goto fini; | |
1437 | } | |
1438 | ||
cb823ed9 | 1439 | reset_count = fake_hangcheck(gt, BIT(id)); |
8c185eca | 1440 | |
496b575e CW |
1441 | if (prev->fence.error != -EIO) { |
1442 | pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", | |
1443 | prev->fence.error); | |
e61e0f51 CW |
1444 | i915_request_put(rq); |
1445 | i915_request_put(prev); | |
496b575e CW |
1446 | err = -EINVAL; |
1447 | goto fini; | |
1448 | } | |
1449 | ||
1450 | if (rq->fence.error) { | |
1451 | pr_err("Fence error status not zero [%d] after unrelated reset\n", | |
1452 | rq->fence.error); | |
e61e0f51 CW |
1453 | i915_request_put(rq); |
1454 | i915_request_put(prev); | |
496b575e CW |
1455 | err = -EINVAL; |
1456 | goto fini; | |
1457 | } | |
1458 | ||
cb823ed9 | 1459 | if (i915_reset_count(global) == reset_count) { |
496b575e | 1460 | pr_err("No GPU reset recorded!\n"); |
e61e0f51 CW |
1461 | i915_request_put(rq); |
1462 | i915_request_put(prev); | |
496b575e CW |
1463 | err = -EINVAL; |
1464 | goto fini; | |
1465 | } | |
1466 | ||
e61e0f51 | 1467 | i915_request_put(prev); |
496b575e CW |
1468 | prev = rq; |
1469 | count++; | |
1470 | } while (time_before(jiffies, end_time)); | |
1471 | pr_info("%s: Completed %d resets\n", engine->name, count); | |
1472 | ||
1473 | *h.batch = MI_BATCH_BUFFER_END; | |
baea429d | 1474 | intel_gt_chipset_flush(engine->gt); |
496b575e | 1475 | |
e61e0f51 | 1476 | i915_request_put(prev); |
a8b66f2c | 1477 | |
7e805762 | 1478 | err = igt_flush_test(gt->i915); |
a8b66f2c CW |
1479 | if (err) |
1480 | break; | |
496b575e CW |
1481 | } |
1482 | ||
1483 | fini: | |
1484 | hang_fini(&h); | |
1485 | unlock: | |
cb823ed9 | 1486 | igt_global_reset_unlock(gt); |
496b575e | 1487 | |
cb823ed9 | 1488 | if (intel_gt_is_wedged(gt)) |
496b575e CW |
1489 | return -EIO; |
1490 | ||
1491 | return err; | |
1492 | } | |
1493 | ||
41533940 | 1494 | static int igt_handle_error(void *arg) |
abeb4def | 1495 | { |
cb823ed9 CW |
1496 | struct intel_gt *gt = arg; |
1497 | struct i915_gpu_error *global = >->i915->gpu_error; | |
1f9f6353 | 1498 | struct intel_engine_cs *engine = gt->engine[RCS0]; |
abeb4def | 1499 | struct hang h; |
e61e0f51 | 1500 | struct i915_request *rq; |
41533940 CW |
1501 | struct i915_gpu_state *error; |
1502 | int err; | |
abeb4def MT |
1503 | |
1504 | /* Check that we can issue a global GPU and engine reset */ | |
1505 | ||
260e6b71 | 1506 | if (!intel_has_reset_engine(gt)) |
abeb4def MT |
1507 | return 0; |
1508 | ||
d0667e9c | 1509 | if (!engine || !intel_engine_can_store_dword(engine)) |
f2f5c061 CW |
1510 | return 0; |
1511 | ||
cb823ed9 | 1512 | err = hang_init(&h, gt); |
abeb4def | 1513 | if (err) |
7e805762 | 1514 | return err; |
abeb4def | 1515 | |
8ec21a7c | 1516 | rq = hang_create_request(&h, engine); |
abeb4def MT |
1517 | if (IS_ERR(rq)) { |
1518 | err = PTR_ERR(rq); | |
774eed4a | 1519 | goto err_fini; |
abeb4def MT |
1520 | } |
1521 | ||
e61e0f51 | 1522 | i915_request_get(rq); |
697b9a87 | 1523 | i915_request_add(rq); |
abeb4def | 1524 | |
29991d53 | 1525 | if (!wait_until_running(&h, rq)) { |
cb823ed9 | 1526 | struct drm_printer p = drm_info_printer(gt->i915->drm.dev); |
95a19ab4 | 1527 | |
dd847a70 | 1528 | pr_err("%s: Failed to start request %llx, at %x\n", |
3fb04cb0 | 1529 | __func__, rq->fence.seqno, hws_seqno(&h, rq)); |
0db18b17 | 1530 | intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); |
87dc03ad | 1531 | |
cb823ed9 | 1532 | intel_gt_set_wedged(gt); |
87dc03ad | 1533 | |
abeb4def | 1534 | err = -EIO; |
774eed4a | 1535 | goto err_request; |
abeb4def MT |
1536 | } |
1537 | ||
41533940 | 1538 | /* Temporarily disable error capture */ |
cb823ed9 | 1539 | error = xchg(&global->first_error, (void *)-1); |
abeb4def | 1540 | |
cb823ed9 | 1541 | intel_gt_handle_error(gt, engine->mask, 0, NULL); |
abeb4def | 1542 | |
cb823ed9 | 1543 | xchg(&global->first_error, error); |
abeb4def | 1544 | |
41533940 CW |
1545 | if (rq->fence.error != -EIO) { |
1546 | pr_err("Guilty request not identified!\n"); | |
1547 | err = -EINVAL; | |
1548 | goto err_request; | |
1549 | } | |
774eed4a CW |
1550 | |
1551 | err_request: | |
e61e0f51 | 1552 | i915_request_put(rq); |
774eed4a CW |
1553 | err_fini: |
1554 | hang_fini(&h); | |
41533940 | 1555 | return err; |
abeb4def MT |
1556 | } |
1557 | ||
921f3a60 | 1558 | static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, |
f6470c9b | 1559 | const struct igt_atomic_section *p, |
921f3a60 CW |
1560 | const char *mode) |
1561 | { | |
1562 | struct tasklet_struct * const t = &engine->execlists.tasklet; | |
1563 | int err; | |
1564 | ||
1565 | GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", | |
1566 | engine->name, mode, p->name); | |
1567 | ||
93100fde | 1568 | tasklet_disable(t); |
921f3a60 CW |
1569 | p->critical_section_begin(); |
1570 | ||
cb823ed9 | 1571 | err = intel_engine_reset(engine, NULL); |
921f3a60 CW |
1572 | |
1573 | p->critical_section_end(); | |
1574 | tasklet_enable(t); | |
1575 | ||
1576 | if (err) | |
1577 | pr_err("i915_reset_engine(%s:%s) failed under %s\n", | |
1578 | engine->name, mode, p->name); | |
1579 | ||
1580 | return err; | |
1581 | } | |
1582 | ||
1583 | static int igt_atomic_reset_engine(struct intel_engine_cs *engine, | |
f6470c9b | 1584 | const struct igt_atomic_section *p) |
921f3a60 | 1585 | { |
921f3a60 CW |
1586 | struct i915_request *rq; |
1587 | struct hang h; | |
1588 | int err; | |
1589 | ||
1590 | err = __igt_atomic_reset_engine(engine, p, "idle"); | |
1591 | if (err) | |
1592 | return err; | |
1593 | ||
cb823ed9 | 1594 | err = hang_init(&h, engine->gt); |
921f3a60 CW |
1595 | if (err) |
1596 | return err; | |
1597 | ||
1598 | rq = hang_create_request(&h, engine); | |
1599 | if (IS_ERR(rq)) { | |
1600 | err = PTR_ERR(rq); | |
1601 | goto out; | |
1602 | } | |
1603 | ||
1604 | i915_request_get(rq); | |
1605 | i915_request_add(rq); | |
1606 | ||
1607 | if (wait_until_running(&h, rq)) { | |
1608 | err = __igt_atomic_reset_engine(engine, p, "active"); | |
1609 | } else { | |
1610 | pr_err("%s(%s): Failed to start request %llx, at %x\n", | |
1611 | __func__, engine->name, | |
1612 | rq->fence.seqno, hws_seqno(&h, rq)); | |
cb823ed9 | 1613 | intel_gt_set_wedged(engine->gt); |
921f3a60 CW |
1614 | err = -EIO; |
1615 | } | |
1616 | ||
1617 | if (err == 0) { | |
cb823ed9 | 1618 | struct intel_wedge_me w; |
921f3a60 | 1619 | |
cb823ed9 | 1620 | intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */) |
2f530945 | 1621 | i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); |
cb823ed9 | 1622 | if (intel_gt_is_wedged(engine->gt)) |
921f3a60 CW |
1623 | err = -EIO; |
1624 | } | |
1625 | ||
1626 | i915_request_put(rq); | |
1627 | out: | |
1628 | hang_fini(&h); | |
1629 | return err; | |
1630 | } | |
1631 | ||
f6470c9b | 1632 | static int igt_reset_engines_atomic(void *arg) |
921f3a60 | 1633 | { |
cb823ed9 | 1634 | struct intel_gt *gt = arg; |
f6470c9b | 1635 | const typeof(*igt_atomic_phases) *p; |
921f3a60 CW |
1636 | int err = 0; |
1637 | ||
f6470c9b MW |
1638 | /* Check that the engines resets are usable from atomic context */ |
1639 | ||
260e6b71 | 1640 | if (!intel_has_reset_engine(gt)) |
f6470c9b MW |
1641 | return 0; |
1642 | ||
cb823ed9 | 1643 | if (USES_GUC_SUBMISSION(gt->i915)) |
f6470c9b | 1644 | return 0; |
921f3a60 | 1645 | |
cb823ed9 | 1646 | igt_global_reset_lock(gt); |
921f3a60 CW |
1647 | |
1648 | /* Flush any requests before we get started and check basics */ | |
cb823ed9 | 1649 | if (!igt_force_reset(gt)) |
921f3a60 CW |
1650 | goto unlock; |
1651 | ||
f6470c9b | 1652 | for (p = igt_atomic_phases; p->name; p++) { |
921f3a60 CW |
1653 | struct intel_engine_cs *engine; |
1654 | enum intel_engine_id id; | |
1655 | ||
5d904e3c | 1656 | for_each_engine(engine, gt, id) { |
f6470c9b MW |
1657 | err = igt_atomic_reset_engine(engine, p); |
1658 | if (err) | |
1659 | goto out; | |
921f3a60 CW |
1660 | } |
1661 | } | |
1662 | ||
1663 | out: | |
1664 | /* As we poke around the guts, do a full reset before continuing. */ | |
cb823ed9 | 1665 | igt_force_reset(gt); |
921f3a60 | 1666 | unlock: |
cb823ed9 | 1667 | igt_global_reset_unlock(gt); |
921f3a60 CW |
1668 | |
1669 | return err; | |
1670 | } | |
1671 | ||
496b575e CW |
1672 | int intel_hangcheck_live_selftests(struct drm_i915_private *i915) |
1673 | { | |
1674 | static const struct i915_subtest tests[] = { | |
1675 | SUBTEST(igt_hang_sanitycheck), | |
36837510 CW |
1676 | SUBTEST(igt_reset_nop), |
1677 | SUBTEST(igt_reset_nop_engine), | |
3fb04cb0 CW |
1678 | SUBTEST(igt_reset_idle_engine), |
1679 | SUBTEST(igt_reset_active_engine), | |
a90507d6 | 1680 | SUBTEST(igt_reset_engines), |
f6470c9b | 1681 | SUBTEST(igt_reset_engines_atomic), |
496b575e | 1682 | SUBTEST(igt_reset_queue), |
eb5f43d4 CW |
1683 | SUBTEST(igt_reset_wait), |
1684 | SUBTEST(igt_reset_evict_ggtt), | |
1685 | SUBTEST(igt_reset_evict_ppgtt), | |
6dc17d69 | 1686 | SUBTEST(igt_reset_evict_fence), |
41533940 | 1687 | SUBTEST(igt_handle_error), |
496b575e | 1688 | }; |
cb823ed9 | 1689 | struct intel_gt *gt = &i915->gt; |
c9d08cc3 | 1690 | intel_wakeref_t wakeref; |
ff97d3ae | 1691 | int err; |
496b575e | 1692 | |
260e6b71 | 1693 | if (!intel_has_gpu_reset(gt)) |
496b575e CW |
1694 | return 0; |
1695 | ||
cb823ed9 | 1696 | if (intel_gt_is_wedged(gt)) |
c4e4f454 CW |
1697 | return -EIO; /* we're long past hope of a successful reset */ |
1698 | ||
cd6a8513 | 1699 | wakeref = intel_runtime_pm_get(gt->uncore->rpm); |
ff97d3ae | 1700 | |
cb823ed9 | 1701 | err = intel_gt_live_subtests(tests, gt); |
ff97d3ae | 1702 | |
cd6a8513 | 1703 | intel_runtime_pm_put(gt->uncore->rpm, wakeref); |
ff97d3ae CW |
1704 | |
1705 | return err; | |
496b575e | 1706 | } |