Commit | Line | Data |
---|---|---|
a72ce6f8 JZ |
1 | /* |
2 | * Copyright 2015 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
a72ce6f8 | 22 | */ |
1b1f42d8 | 23 | |
2d33948e ND |
24 | /** |
25 | * DOC: Overview | |
26 | * | |
27 | * The GPU scheduler provides entities which allow userspace to push jobs | |
28 | * into software queues which are then scheduled on a hardware run queue. | |
29 | * The software queues have a priority among them. The scheduler selects the entities | |
30 | * from the run queue using a FIFO. The scheduler provides dependency handling | |
31 | * features among jobs. The driver is supposed to provide callback functions for | |
32 | * backend operations to the scheduler like submitting a job to hardware run queue, | |
33 | * returning the dependencies of a job etc. | |
34 | * | |
35 | * The organisation of the scheduler is the following: | |
36 | * | |
37 | * 1. Each hw run queue has one scheduler | |
38 | * 2. Each scheduler has multiple run queues with different priorities | |
39 | * (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL) | |
40 | * 3. Each scheduler run queue has a queue of entities to schedule | |
41 | * 4. Entities themselves maintain a queue of jobs that will be scheduled on | |
42 | * the hardware. | |
43 | * | |
44 | * The jobs in a entity are always scheduled in the order that they were pushed. | |
45 | */ | |
46 | ||
a72ce6f8 JZ |
47 | #include <linux/kthread.h> |
48 | #include <linux/wait.h> | |
49 | #include <linux/sched.h> | |
83a7772b | 50 | #include <linux/completion.h> |
dbe48d03 | 51 | #include <linux/dma-resv.h> |
ae7e81c0 | 52 | #include <uapi/linux/sched/types.h> |
7c1be93c SR |
53 | |
54 | #include <drm/drm_print.h> | |
dbe48d03 | 55 | #include <drm/drm_gem.h> |
1b1f42d8 LS |
56 | #include <drm/gpu_scheduler.h> |
57 | #include <drm/spsc_queue.h> | |
83f4b118 | 58 | |
353da3c5 | 59 | #define CREATE_TRACE_POINTS |
a70cdb9e | 60 | #include "gpu_scheduler_trace.h" |
353da3c5 | 61 | |
1b1f42d8 LS |
62 | #define to_drm_sched_job(sched_job) \ |
63 | container_of((sched_job), struct drm_sched_job, queue_node) | |
83f4b118 | 64 | |
2d33948e ND |
65 | /** |
66 | * drm_sched_rq_init - initialize a given run queue struct | |
67 | * | |
26b5cf49 | 68 | * @sched: scheduler instance to associate with this run queue |
2d33948e ND |
69 | * @rq: scheduler run queue |
70 | * | |
71 | * Initializes a scheduler runqueue. | |
72 | */ | |
8dc9fbbf ND |
73 | static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, |
74 | struct drm_sched_rq *rq) | |
a72ce6f8 | 75 | { |
2b184d8d | 76 | spin_lock_init(&rq->lock); |
432a4ff8 | 77 | INIT_LIST_HEAD(&rq->entities); |
432a4ff8 | 78 | rq->current_entity = NULL; |
8dc9fbbf | 79 | rq->sched = sched; |
a72ce6f8 JZ |
80 | } |
81 | ||
2d33948e ND |
82 | /** |
83 | * drm_sched_rq_add_entity - add an entity | |
84 | * | |
85 | * @rq: scheduler run queue | |
86 | * @entity: scheduler entity | |
87 | * | |
88 | * Adds a scheduler entity to the run queue. | |
89 | */ | |
620e762f CK |
90 | void drm_sched_rq_add_entity(struct drm_sched_rq *rq, |
91 | struct drm_sched_entity *entity) | |
a72ce6f8 | 92 | { |
e8deea2d CZ |
93 | if (!list_empty(&entity->list)) |
94 | return; | |
2b184d8d | 95 | spin_lock(&rq->lock); |
f2f12eb9 | 96 | atomic_inc(rq->sched->score); |
432a4ff8 | 97 | list_add_tail(&entity->list, &rq->entities); |
2b184d8d | 98 | spin_unlock(&rq->lock); |
a72ce6f8 JZ |
99 | } |
100 | ||
2d33948e ND |
101 | /** |
102 | * drm_sched_rq_remove_entity - remove an entity | |
103 | * | |
104 | * @rq: scheduler run queue | |
105 | * @entity: scheduler entity | |
106 | * | |
107 | * Removes a scheduler entity from the run queue. | |
108 | */ | |
620e762f CK |
109 | void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, |
110 | struct drm_sched_entity *entity) | |
a72ce6f8 | 111 | { |
e8deea2d CZ |
112 | if (list_empty(&entity->list)) |
113 | return; | |
2b184d8d | 114 | spin_lock(&rq->lock); |
f2f12eb9 | 115 | atomic_dec(rq->sched->score); |
432a4ff8 CK |
116 | list_del_init(&entity->list); |
117 | if (rq->current_entity == entity) | |
118 | rq->current_entity = NULL; | |
2b184d8d | 119 | spin_unlock(&rq->lock); |
a72ce6f8 JZ |
120 | } |
121 | ||
122 | /** | |
2d33948e | 123 | * drm_sched_rq_select_entity - Select an entity which could provide a job to run |
3d651936 | 124 | * |
2d33948e | 125 | * @rq: scheduler run queue to check. |
3d651936 CK |
126 | * |
127 | * Try to find a ready entity, returns NULL if none found. | |
a72ce6f8 | 128 | */ |
1b1f42d8 LS |
129 | static struct drm_sched_entity * |
130 | drm_sched_rq_select_entity(struct drm_sched_rq *rq) | |
a72ce6f8 | 131 | { |
1b1f42d8 | 132 | struct drm_sched_entity *entity; |
432a4ff8 | 133 | |
2b184d8d CK |
134 | spin_lock(&rq->lock); |
135 | ||
136 | entity = rq->current_entity; | |
432a4ff8 CK |
137 | if (entity) { |
138 | list_for_each_entry_continue(entity, &rq->entities, list) { | |
1b1f42d8 | 139 | if (drm_sched_entity_is_ready(entity)) { |
432a4ff8 | 140 | rq->current_entity = entity; |
83a7772b | 141 | reinit_completion(&entity->entity_idle); |
2b184d8d | 142 | spin_unlock(&rq->lock); |
3d651936 | 143 | return entity; |
432a4ff8 | 144 | } |
a72ce6f8 | 145 | } |
a72ce6f8 | 146 | } |
a72ce6f8 | 147 | |
432a4ff8 | 148 | list_for_each_entry(entity, &rq->entities, list) { |
a72ce6f8 | 149 | |
1b1f42d8 | 150 | if (drm_sched_entity_is_ready(entity)) { |
432a4ff8 | 151 | rq->current_entity = entity; |
83a7772b | 152 | reinit_completion(&entity->entity_idle); |
2b184d8d | 153 | spin_unlock(&rq->lock); |
3d651936 | 154 | return entity; |
432a4ff8 | 155 | } |
a72ce6f8 | 156 | |
432a4ff8 CK |
157 | if (entity == rq->current_entity) |
158 | break; | |
159 | } | |
a72ce6f8 | 160 | |
2b184d8d CK |
161 | spin_unlock(&rq->lock); |
162 | ||
432a4ff8 | 163 | return NULL; |
a72ce6f8 JZ |
164 | } |
165 | ||
71173e78 LT |
166 | /** |
167 | * drm_sched_job_done - complete a job | |
168 | * @s_job: pointer to the job which is done | |
169 | * | |
170 | * Finish the job's fence and wake up the worker thread. | |
171 | */ | |
172 | static void drm_sched_job_done(struct drm_sched_job *s_job) | |
173 | { | |
174 | struct drm_sched_fence *s_fence = s_job->s_fence; | |
175 | struct drm_gpu_scheduler *sched = s_fence->sched; | |
176 | ||
177 | atomic_dec(&sched->hw_rq_count); | |
f2f12eb9 | 178 | atomic_dec(sched->score); |
71173e78 LT |
179 | |
180 | trace_drm_sched_process_job(s_fence); | |
181 | ||
182 | dma_fence_get(&s_fence->finished); | |
183 | drm_sched_fence_finished(s_fence); | |
184 | dma_fence_put(&s_fence->finished); | |
185 | wake_up_interruptible(&sched->wake_up_worker); | |
186 | } | |
187 | ||
188 | /** | |
189 | * drm_sched_job_done_cb - the callback for a done job | |
190 | * @f: fence | |
191 | * @cb: fence callbacks | |
192 | */ | |
193 | static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb) | |
194 | { | |
195 | struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb); | |
196 | ||
197 | drm_sched_job_done(s_job); | |
198 | } | |
199 | ||
2d33948e ND |
200 | /** |
201 | * drm_sched_dependency_optimized | |
202 | * | |
203 | * @fence: the dependency fence | |
204 | * @entity: the entity which depends on the above fence | |
205 | * | |
206 | * Returns true if the dependency can be optimized and false otherwise | |
207 | */ | |
1b1f42d8 LS |
208 | bool drm_sched_dependency_optimized(struct dma_fence* fence, |
209 | struct drm_sched_entity *entity) | |
30514dec | 210 | { |
068c3304 | 211 | struct drm_gpu_scheduler *sched = entity->rq->sched; |
1b1f42d8 | 212 | struct drm_sched_fence *s_fence; |
30514dec CZ |
213 | |
214 | if (!fence || dma_fence_is_signaled(fence)) | |
215 | return false; | |
216 | if (fence->context == entity->fence_context) | |
217 | return true; | |
1b1f42d8 | 218 | s_fence = to_drm_sched_fence(fence); |
30514dec CZ |
219 | if (s_fence && s_fence->sched == sched) |
220 | return true; | |
221 | ||
222 | return false; | |
223 | } | |
1b1f42d8 | 224 | EXPORT_SYMBOL(drm_sched_dependency_optimized); |
30514dec | 225 | |
b981c86f CK |
226 | /** |
227 | * drm_sched_start_timeout - start timeout for reset worker | |
228 | * | |
229 | * @sched: scheduler instance to start the worker for | |
230 | * | |
231 | * Start the timeout for the given scheduler. | |
232 | */ | |
233 | static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) | |
234 | { | |
235 | if (sched->timeout != MAX_SCHEDULE_TIMEOUT && | |
6efa4b46 | 236 | !list_empty(&sched->pending_list)) |
78efe21b | 237 | queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); |
b981c86f CK |
238 | } |
239 | ||
8fe159b0 CK |
240 | /** |
241 | * drm_sched_fault - immediately start timeout handler | |
242 | * | |
243 | * @sched: scheduler where the timeout handling should be started. | |
244 | * | |
245 | * Start timeout handling immediately when the driver detects a hardware fault. | |
246 | */ | |
247 | void drm_sched_fault(struct drm_gpu_scheduler *sched) | |
248 | { | |
78efe21b | 249 | mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); |
8fe159b0 CK |
250 | } |
251 | EXPORT_SYMBOL(drm_sched_fault); | |
252 | ||
1db8c142 SM |
253 | /** |
254 | * drm_sched_suspend_timeout - Suspend scheduler job timeout | |
255 | * | |
256 | * @sched: scheduler instance for which to suspend the timeout | |
257 | * | |
258 | * Suspend the delayed work timeout for the scheduler. This is done by | |
259 | * modifying the delayed work timeout to an arbitrary large value, | |
a7fbb630 | 260 | * MAX_SCHEDULE_TIMEOUT in this case. |
1db8c142 SM |
261 | * |
262 | * Returns the timeout remaining | |
263 | * | |
264 | */ | |
265 | unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched) | |
266 | { | |
267 | unsigned long sched_timeout, now = jiffies; | |
268 | ||
269 | sched_timeout = sched->work_tdr.timer.expires; | |
270 | ||
271 | /* | |
272 | * Modify the timeout to an arbitrarily large value. This also prevents | |
273 | * the timeout to be restarted when new submissions arrive | |
274 | */ | |
78efe21b | 275 | if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) |
1db8c142 SM |
276 | && time_after(sched_timeout, now)) |
277 | return sched_timeout - now; | |
278 | else | |
279 | return sched->timeout; | |
280 | } | |
281 | EXPORT_SYMBOL(drm_sched_suspend_timeout); | |
282 | ||
283 | /** | |
284 | * drm_sched_resume_timeout - Resume scheduler job timeout | |
285 | * | |
286 | * @sched: scheduler instance for which to resume the timeout | |
287 | * @remaining: remaining timeout | |
288 | * | |
a7fbb630 | 289 | * Resume the delayed work timeout for the scheduler. |
1db8c142 SM |
290 | */ |
291 | void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, | |
292 | unsigned long remaining) | |
293 | { | |
a7fbb630 | 294 | spin_lock(&sched->job_list_lock); |
1db8c142 | 295 | |
6efa4b46 | 296 | if (list_empty(&sched->pending_list)) |
1db8c142 SM |
297 | cancel_delayed_work(&sched->work_tdr); |
298 | else | |
78efe21b | 299 | mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining); |
1db8c142 | 300 | |
a7fbb630 | 301 | spin_unlock(&sched->job_list_lock); |
1db8c142 SM |
302 | } |
303 | EXPORT_SYMBOL(drm_sched_resume_timeout); | |
304 | ||
1b1f42d8 | 305 | static void drm_sched_job_begin(struct drm_sched_job *s_job) |
0de2479c | 306 | { |
1b1f42d8 | 307 | struct drm_gpu_scheduler *sched = s_job->sched; |
0de2479c | 308 | |
a7fbb630 | 309 | spin_lock(&sched->job_list_lock); |
6efa4b46 | 310 | list_add_tail(&s_job->list, &sched->pending_list); |
b981c86f | 311 | drm_sched_start_timeout(sched); |
a7fbb630 | 312 | spin_unlock(&sched->job_list_lock); |
0de2479c ML |
313 | } |
314 | ||
1b1f42d8 | 315 | static void drm_sched_job_timedout(struct work_struct *work) |
0e51a772 | 316 | { |
6a962430 ND |
317 | struct drm_gpu_scheduler *sched; |
318 | struct drm_sched_job *job; | |
75973e58 | 319 | enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; |
6a962430 ND |
320 | |
321 | sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); | |
135517d3 AG |
322 | |
323 | /* Protects against concurrent deletion in drm_sched_get_cleanup_job */ | |
a7fbb630 | 324 | spin_lock(&sched->job_list_lock); |
6efa4b46 | 325 | job = list_first_entry_or_null(&sched->pending_list, |
8935ff00 | 326 | struct drm_sched_job, list); |
0e51a772 | 327 | |
b576ff90 | 328 | if (job) { |
135517d3 AG |
329 | /* |
330 | * Remove the bad job so it cannot be freed by concurrent | |
331 | * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread | |
332 | * is parked at which point it's safe. | |
333 | */ | |
8935ff00 | 334 | list_del_init(&job->list); |
a7fbb630 | 335 | spin_unlock(&sched->job_list_lock); |
135517d3 | 336 | |
75973e58 | 337 | status = job->sched->ops->timedout_job(job); |
0efd2d2f | 338 | |
b576ff90 AG |
339 | /* |
340 | * Guilty job did complete and hence needs to be manually removed | |
341 | * See drm_sched_stop doc. | |
342 | */ | |
343 | if (sched->free_guilty) { | |
344 | job->sched->ops->free_job(job); | |
345 | sched->free_guilty = false; | |
346 | } | |
135517d3 | 347 | } else { |
a7fbb630 | 348 | spin_unlock(&sched->job_list_lock); |
a5343b8a | 349 | } |
5918045c | 350 | |
75973e58 AG |
351 | if (status != DRM_GPU_SCHED_STAT_ENODEV) { |
352 | spin_lock(&sched->job_list_lock); | |
353 | drm_sched_start_timeout(sched); | |
354 | spin_unlock(&sched->job_list_lock); | |
355 | } | |
0e51a772 CK |
356 | } |
357 | ||
222b5f04 AG |
358 | /** |
359 | * drm_sched_increase_karma - Update sched_entity guilty flag | |
360 | * | |
361 | * @bad: The job guilty of time out | |
362 | * | |
363 | * Increment on every hang caused by the 'bad' job. If this exceeds the hang | |
364 | * limit of the scheduler then the respective sched entity is marked guilty and | |
365 | * jobs from it will not be scheduled further | |
366 | */ | |
367 | void drm_sched_increase_karma(struct drm_sched_job *bad) | |
368 | { | |
e6c6338f | 369 | drm_sched_increase_karma_ext(bad, 1); |
222b5f04 AG |
370 | } |
371 | EXPORT_SYMBOL(drm_sched_increase_karma); | |
372 | ||
e6c6338f JZ |
373 | void drm_sched_reset_karma(struct drm_sched_job *bad) |
374 | { | |
375 | drm_sched_increase_karma_ext(bad, 0); | |
376 | } | |
377 | EXPORT_SYMBOL(drm_sched_reset_karma); | |
378 | ||
2d33948e | 379 | /** |
f5d35632 | 380 | * drm_sched_stop - stop the scheduler |
2d33948e ND |
381 | * |
382 | * @sched: scheduler instance | |
d0f29d49 | 383 | * @bad: job which caused the time out |
2d33948e | 384 | * |
5918045c CK |
385 | * Stop the scheduler and also removes and frees all completed jobs. |
386 | * Note: bad job will not be freed as it might be used later and so it's | |
387 | * callers responsibility to release it manually if it's not part of the | |
6efa4b46 | 388 | * pending list any more. |
5918045c | 389 | * |
2d33948e | 390 | */ |
5918045c | 391 | void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) |
e686e75d | 392 | { |
5918045c | 393 | struct drm_sched_job *s_job, *tmp; |
e686e75d | 394 | |
222b5f04 AG |
395 | kthread_park(sched->thread); |
396 | ||
135517d3 AG |
397 | /* |
398 | * Reinsert back the bad job here - now it's safe as | |
399 | * drm_sched_get_cleanup_job cannot race against us and release the | |
400 | * bad job at this point - we parked (waited for) any in progress | |
401 | * (earlier) cleanups and drm_sched_get_cleanup_job will not be called | |
402 | * now until the scheduler thread is unparked. | |
403 | */ | |
404 | if (bad && bad->sched == sched) | |
405 | /* | |
406 | * Add at the head of the queue to reflect it was the earliest | |
407 | * job extracted. | |
408 | */ | |
6efa4b46 | 409 | list_add(&bad->list, &sched->pending_list); |
135517d3 | 410 | |
222b5f04 | 411 | /* |
5918045c | 412 | * Iterate the job list from later to earlier one and either deactive |
6efa4b46 | 413 | * their HW callbacks or remove them from pending list if they already |
5918045c CK |
414 | * signaled. |
415 | * This iteration is thread safe as sched thread is stopped. | |
222b5f04 | 416 | */ |
6efa4b46 | 417 | list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list, |
8935ff00 | 418 | list) { |
a6bef67e CZ |
419 | if (s_job->s_fence->parent && |
420 | dma_fence_remove_callback(s_job->s_fence->parent, | |
3741540e | 421 | &s_job->cb)) { |
65781c78 | 422 | atomic_dec(&sched->hw_rq_count); |
222b5f04 | 423 | } else { |
5918045c | 424 | /* |
6efa4b46 | 425 | * remove job from pending_list. |
5918045c CK |
426 | * Locking here is for concurrent resume timeout |
427 | */ | |
a7fbb630 | 428 | spin_lock(&sched->job_list_lock); |
8935ff00 | 429 | list_del_init(&s_job->list); |
a7fbb630 | 430 | spin_unlock(&sched->job_list_lock); |
5918045c CK |
431 | |
432 | /* | |
433 | * Wait for job's HW fence callback to finish using s_job | |
434 | * before releasing it. | |
435 | * | |
436 | * Job is still alive so fence refcount at least 1 | |
437 | */ | |
438 | dma_fence_wait(&s_job->s_fence->finished, false); | |
439 | ||
440 | /* | |
441 | * We must keep bad job alive for later use during | |
a5343b8a AG |
442 | * recovery by some of the drivers but leave a hint |
443 | * that the guilty job must be released. | |
5918045c CK |
444 | */ |
445 | if (bad != s_job) | |
446 | sched->ops->free_job(s_job); | |
a5343b8a AG |
447 | else |
448 | sched->free_guilty = true; | |
e686e75d CZ |
449 | } |
450 | } | |
290764af AG |
451 | |
452 | /* | |
453 | * Stop pending timer in flight as we rearm it in drm_sched_start. This | |
454 | * avoids the pending timeout work in progress to fire right away after | |
455 | * this TDR finished and before the newly restarted jobs had a | |
456 | * chance to complete. | |
457 | */ | |
458 | cancel_delayed_work(&sched->work_tdr); | |
65781c78 | 459 | } |
222b5f04 AG |
460 | |
461 | EXPORT_SYMBOL(drm_sched_stop); | |
65781c78 | 462 | |
2d33948e | 463 | /** |
e9d2871f | 464 | * drm_sched_start - recover jobs after a reset |
2d33948e ND |
465 | * |
466 | * @sched: scheduler instance | |
d0f29d49 | 467 | * @full_recovery: proceed with complete sched restart |
2d33948e ND |
468 | * |
469 | */ | |
222b5f04 | 470 | void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) |
ec75f573 | 471 | { |
1b1f42d8 | 472 | struct drm_sched_job *s_job, *tmp; |
ec75f573 CZ |
473 | int r; |
474 | ||
3741540e AG |
475 | /* |
476 | * Locking the list is not required here as the sched thread is parked | |
5918045c | 477 | * so no new jobs are being inserted or removed. Also concurrent |
3741540e AG |
478 | * GPU recovers can't run in parallel. |
479 | */ | |
6efa4b46 | 480 | list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { |
222b5f04 | 481 | struct dma_fence *fence = s_job->s_fence->parent; |
8ee3a52e | 482 | |
5918045c CK |
483 | atomic_inc(&sched->hw_rq_count); |
484 | ||
485 | if (!full_recovery) | |
486 | continue; | |
487 | ||
ec75f573 | 488 | if (fence) { |
3741540e | 489 | r = dma_fence_add_callback(fence, &s_job->cb, |
71173e78 | 490 | drm_sched_job_done_cb); |
ec75f573 | 491 | if (r == -ENOENT) |
71173e78 | 492 | drm_sched_job_done(s_job); |
ec75f573 CZ |
493 | else if (r) |
494 | DRM_ERROR("fence add callback failed (%d)\n", | |
495 | r); | |
222b5f04 | 496 | } else |
71173e78 | 497 | drm_sched_job_done(s_job); |
ec75f573 | 498 | } |
222b5f04 | 499 | |
5918045c | 500 | if (full_recovery) { |
a7fbb630 | 501 | spin_lock(&sched->job_list_lock); |
5918045c | 502 | drm_sched_start_timeout(sched); |
a7fbb630 | 503 | spin_unlock(&sched->job_list_lock); |
5918045c | 504 | } |
222b5f04 | 505 | |
222b5f04 | 506 | kthread_unpark(sched->thread); |
ec75f573 | 507 | } |
222b5f04 AG |
508 | EXPORT_SYMBOL(drm_sched_start); |
509 | ||
510 | /** | |
a6a1f036 | 511 | * drm_sched_resubmit_jobs - helper to relaunch jobs from the pending list |
222b5f04 AG |
512 | * |
513 | * @sched: scheduler instance | |
514 | * | |
515 | */ | |
516 | void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) | |
e6c6338f JZ |
517 | { |
518 | drm_sched_resubmit_jobs_ext(sched, INT_MAX); | |
519 | } | |
520 | EXPORT_SYMBOL(drm_sched_resubmit_jobs); | |
521 | ||
522 | /** | |
523 | * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs from mirror ring list | |
524 | * | |
525 | * @sched: scheduler instance | |
526 | * @max: job numbers to relaunch | |
527 | * | |
528 | */ | |
529 | void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max) | |
222b5f04 AG |
530 | { |
531 | struct drm_sched_job *s_job, *tmp; | |
532 | uint64_t guilty_context; | |
533 | bool found_guilty = false; | |
e91e5f08 | 534 | struct dma_fence *fence; |
e6c6338f | 535 | int i = 0; |
222b5f04 | 536 | |
6efa4b46 | 537 | list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { |
222b5f04 AG |
538 | struct drm_sched_fence *s_fence = s_job->s_fence; |
539 | ||
e6c6338f JZ |
540 | if (i >= max) |
541 | break; | |
542 | ||
222b5f04 AG |
543 | if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) { |
544 | found_guilty = true; | |
545 | guilty_context = s_job->s_fence->scheduled.context; | |
546 | } | |
547 | ||
548 | if (found_guilty && s_job->s_fence->scheduled.context == guilty_context) | |
549 | dma_fence_set_error(&s_fence->finished, -ECANCELED); | |
550 | ||
290764af | 551 | dma_fence_put(s_job->s_fence->parent); |
e91e5f08 | 552 | fence = sched->ops->run_job(s_job); |
e6c6338f | 553 | i++; |
e91e5f08 AG |
554 | |
555 | if (IS_ERR_OR_NULL(fence)) { | |
d7c5782a AG |
556 | if (IS_ERR(fence)) |
557 | dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); | |
558 | ||
e91e5f08 | 559 | s_job->s_fence->parent = NULL; |
e91e5f08 AG |
560 | } else { |
561 | s_job->s_fence->parent = fence; | |
562 | } | |
222b5f04 AG |
563 | } |
564 | } | |
e6c6338f | 565 | EXPORT_SYMBOL(drm_sched_resubmit_jobs_ext); |
ec75f573 | 566 | |
563e1e66 | 567 | /** |
2d33948e | 568 | * drm_sched_job_init - init a scheduler job |
2d33948e | 569 | * @job: scheduler job to init |
2d33948e ND |
570 | * @entity: scheduler entity to use |
571 | * @owner: job owner for debugging | |
572 | * | |
573 | * Refer to drm_sched_entity_push_job() documentation | |
563e1e66 | 574 | * for locking considerations. |
2d33948e | 575 | * |
dbe48d03 DV |
576 | * Drivers must make sure drm_sched_job_cleanup() if this function returns |
577 | * successfully, even when @job is aborted before drm_sched_job_arm() is called. | |
578 | * | |
579 | * WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware | |
580 | * has died, which can mean that there's no valid runqueue for a @entity. | |
581 | * This function returns -ENOENT in this case (which probably should be -EIO as | |
582 | * a more meanigful return value). | |
583 | * | |
2d33948e | 584 | * Returns 0 for success, negative error code otherwise. |
563e1e66 | 585 | */ |
1b1f42d8 | 586 | int drm_sched_job_init(struct drm_sched_job *job, |
1b1f42d8 | 587 | struct drm_sched_entity *entity, |
595a9cd6 | 588 | void *owner) |
e686941a | 589 | { |
35e160e7 | 590 | drm_sched_entity_select_rq(entity); |
faf6e1a8 AG |
591 | if (!entity->rq) |
592 | return -ENOENT; | |
593 | ||
8ee3a52e | 594 | job->entity = entity; |
dbe48d03 | 595 | job->s_fence = drm_sched_fence_alloc(entity, owner); |
e686941a ML |
596 | if (!job->s_fence) |
597 | return -ENOMEM; | |
598 | ||
8935ff00 | 599 | INIT_LIST_HEAD(&job->list); |
4835096b | 600 | |
ebd5f742 DV |
601 | xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC); |
602 | ||
e686941a ML |
603 | return 0; |
604 | } | |
1b1f42d8 | 605 | EXPORT_SYMBOL(drm_sched_job_init); |
e686941a | 606 | |
26efecf9 | 607 | /** |
dbe48d03 DV |
608 | * drm_sched_job_arm - arm a scheduler job for execution |
609 | * @job: scheduler job to arm | |
610 | * | |
611 | * This arms a scheduler job for execution. Specifically it initializes the | |
612 | * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv | |
613 | * or other places that need to track the completion of this job. | |
614 | * | |
615 | * Refer to drm_sched_entity_push_job() documentation for locking | |
616 | * considerations. | |
26efecf9 | 617 | * |
dbe48d03 DV |
618 | * This can only be called if drm_sched_job_init() succeeded. |
619 | */ | |
620 | void drm_sched_job_arm(struct drm_sched_job *job) | |
621 | { | |
622 | struct drm_gpu_scheduler *sched; | |
623 | struct drm_sched_entity *entity = job->entity; | |
624 | ||
625 | BUG_ON(!entity); | |
626 | ||
627 | sched = entity->rq->sched; | |
628 | ||
629 | job->sched = sched; | |
630 | job->s_priority = entity->rq - sched->sched_rq; | |
631 | job->id = atomic64_inc_return(&sched->job_id_count); | |
632 | ||
633 | drm_sched_fence_init(job->s_fence, job->entity); | |
634 | } | |
635 | EXPORT_SYMBOL(drm_sched_job_arm); | |
636 | ||
ebd5f742 DV |
637 | /** |
638 | * drm_sched_job_add_dependency - adds the fence as a job dependency | |
639 | * @job: scheduler job to add the dependencies to | |
640 | * @fence: the dma_fence to add to the list of dependencies. | |
641 | * | |
642 | * Note that @fence is consumed in both the success and error cases. | |
643 | * | |
644 | * Returns: | |
645 | * 0 on success, or an error on failing to expand the array. | |
646 | */ | |
647 | int drm_sched_job_add_dependency(struct drm_sched_job *job, | |
648 | struct dma_fence *fence) | |
649 | { | |
650 | struct dma_fence *entry; | |
651 | unsigned long index; | |
652 | u32 id = 0; | |
653 | int ret; | |
654 | ||
655 | if (!fence) | |
656 | return 0; | |
657 | ||
658 | /* Deduplicate if we already depend on a fence from the same context. | |
659 | * This lets the size of the array of deps scale with the number of | |
660 | * engines involved, rather than the number of BOs. | |
661 | */ | |
662 | xa_for_each(&job->dependencies, index, entry) { | |
663 | if (entry->context != fence->context) | |
664 | continue; | |
665 | ||
666 | if (dma_fence_is_later(fence, entry)) { | |
667 | dma_fence_put(entry); | |
668 | xa_store(&job->dependencies, index, fence, GFP_KERNEL); | |
669 | } else { | |
670 | dma_fence_put(fence); | |
671 | } | |
672 | return 0; | |
673 | } | |
674 | ||
675 | ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL); | |
676 | if (ret != 0) | |
677 | dma_fence_put(fence); | |
678 | ||
679 | return ret; | |
680 | } | |
681 | EXPORT_SYMBOL(drm_sched_job_add_dependency); | |
682 | ||
683 | /** | |
684 | * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job | |
685 | * dependencies | |
686 | * @job: scheduler job to add the dependencies to | |
687 | * @obj: the gem object to add new dependencies from. | |
688 | * @write: whether the job might write the object (so we need to depend on | |
689 | * shared fences in the reservation object). | |
690 | * | |
691 | * This should be called after drm_gem_lock_reservations() on your array of | |
692 | * GEM objects used in the job but before updating the reservations with your | |
693 | * own fences. | |
694 | * | |
695 | * Returns: | |
696 | * 0 on success, or an error on failing to expand the array. | |
697 | */ | |
698 | int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, | |
699 | struct drm_gem_object *obj, | |
700 | bool write) | |
701 | { | |
9c2ba265 CK |
702 | struct dma_resv_iter cursor; |
703 | struct dma_fence *fence; | |
ebd5f742 | 704 | int ret; |
ebd5f742 | 705 | |
9c2ba265 CK |
706 | dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { |
707 | ret = drm_sched_job_add_dependency(job, fence); | |
ebd5f742 | 708 | if (ret) |
9c2ba265 | 709 | return ret; |
13e9e30c CK |
710 | |
711 | /* Make sure to grab an additional ref on the added fence */ | |
712 | dma_fence_get(fence); | |
ebd5f742 | 713 | } |
9c2ba265 | 714 | return 0; |
ebd5f742 DV |
715 | } |
716 | EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies); | |
717 | ||
718 | ||
dbe48d03 DV |
719 | /** |
720 | * drm_sched_job_cleanup - clean up scheduler job resources | |
26efecf9 | 721 | * @job: scheduler job to clean up |
dbe48d03 DV |
722 | * |
723 | * Cleans up the resources allocated with drm_sched_job_init(). | |
724 | * | |
725 | * Drivers should call this from their error unwind code if @job is aborted | |
726 | * before drm_sched_job_arm() is called. | |
727 | * | |
728 | * After that point of no return @job is committed to be executed by the | |
729 | * scheduler, and this function should be called from the | |
730 | * &drm_sched_backend_ops.free_job callback. | |
26efecf9 SM |
731 | */ |
732 | void drm_sched_job_cleanup(struct drm_sched_job *job) | |
733 | { | |
ebd5f742 DV |
734 | struct dma_fence *fence; |
735 | unsigned long index; | |
736 | ||
dbe48d03 DV |
737 | if (kref_read(&job->s_fence->finished.refcount)) { |
738 | /* drm_sched_job_arm() has been called */ | |
739 | dma_fence_put(&job->s_fence->finished); | |
740 | } else { | |
741 | /* aborted job before committing to run it */ | |
d4c16733 | 742 | drm_sched_fence_free(job->s_fence); |
dbe48d03 DV |
743 | } |
744 | ||
26efecf9 | 745 | job->s_fence = NULL; |
ebd5f742 DV |
746 | |
747 | xa_for_each(&job->dependencies, index, fence) { | |
748 | dma_fence_put(fence); | |
749 | } | |
750 | xa_destroy(&job->dependencies); | |
751 | ||
26efecf9 SM |
752 | } |
753 | EXPORT_SYMBOL(drm_sched_job_cleanup); | |
754 | ||
e688b728 | 755 | /** |
2d33948e ND |
756 | * drm_sched_ready - is the scheduler ready |
757 | * | |
758 | * @sched: scheduler instance | |
759 | * | |
760 | * Return true if we can push more jobs to the hw, otherwise false. | |
e688b728 | 761 | */ |
1b1f42d8 | 762 | static bool drm_sched_ready(struct drm_gpu_scheduler *sched) |
e688b728 CK |
763 | { |
764 | return atomic_read(&sched->hw_rq_count) < | |
765 | sched->hw_submission_limit; | |
766 | } | |
767 | ||
88079006 | 768 | /** |
2d33948e ND |
769 | * drm_sched_wakeup - Wake up the scheduler when it is ready |
770 | * | |
771 | * @sched: scheduler instance | |
772 | * | |
88079006 | 773 | */ |
620e762f | 774 | void drm_sched_wakeup(struct drm_gpu_scheduler *sched) |
88079006 | 775 | { |
1b1f42d8 | 776 | if (drm_sched_ready(sched)) |
c2b6bd7e | 777 | wake_up_interruptible(&sched->wake_up_worker); |
88079006 CK |
778 | } |
779 | ||
e688b728 | 780 | /** |
2d33948e ND |
781 | * drm_sched_select_entity - Select next entity to process |
782 | * | |
783 | * @sched: scheduler instance | |
784 | * | |
785 | * Returns the entity to process or NULL if none are found. | |
786 | */ | |
1b1f42d8 LS |
787 | static struct drm_sched_entity * |
788 | drm_sched_select_entity(struct drm_gpu_scheduler *sched) | |
e688b728 | 789 | { |
1b1f42d8 | 790 | struct drm_sched_entity *entity; |
d033a6de | 791 | int i; |
e688b728 | 792 | |
1b1f42d8 | 793 | if (!drm_sched_ready(sched)) |
e688b728 CK |
794 | return NULL; |
795 | ||
796 | /* Kernel run queue has higher priority than normal run queue*/ | |
e2d732fd | 797 | for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { |
1b1f42d8 | 798 | entity = drm_sched_rq_select_entity(&sched->sched_rq[i]); |
d033a6de CZ |
799 | if (entity) |
800 | break; | |
801 | } | |
e688b728 | 802 | |
3d651936 | 803 | return entity; |
e688b728 CK |
804 | } |
805 | ||
5918045c | 806 | /** |
588b9828 | 807 | * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed |
5918045c CK |
808 | * |
809 | * @sched: scheduler instance | |
810 | * | |
6efa4b46 | 811 | * Returns the next finished job from the pending list (if there is one) |
588b9828 | 812 | * ready for it to be destroyed. |
5918045c | 813 | */ |
588b9828 SP |
814 | static struct drm_sched_job * |
815 | drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) | |
5918045c | 816 | { |
1774baa6 | 817 | struct drm_sched_job *job, *next; |
5918045c | 818 | |
a7fbb630 | 819 | spin_lock(&sched->job_list_lock); |
5918045c | 820 | |
6efa4b46 | 821 | job = list_first_entry_or_null(&sched->pending_list, |
8935ff00 | 822 | struct drm_sched_job, list); |
5918045c | 823 | |
588b9828 | 824 | if (job && dma_fence_is_signaled(&job->s_fence->finished)) { |
6efa4b46 | 825 | /* remove job from pending_list */ |
8935ff00 | 826 | list_del_init(&job->list); |
bcf26654 ML |
827 | |
828 | /* cancel this job's TO timer */ | |
829 | cancel_delayed_work(&sched->work_tdr); | |
1774baa6 RS |
830 | /* make the scheduled timestamp more accurate */ |
831 | next = list_first_entry_or_null(&sched->pending_list, | |
832 | typeof(*next), list); | |
bcf26654 ML |
833 | |
834 | if (next) { | |
1774baa6 RS |
835 | next->s_fence->scheduled.timestamp = |
836 | job->s_fence->finished.timestamp; | |
bcf26654 ML |
837 | /* start TO timer for next job */ |
838 | drm_sched_start_timeout(sched); | |
839 | } | |
588b9828 SP |
840 | } else { |
841 | job = NULL; | |
5918045c CK |
842 | } |
843 | ||
a7fbb630 | 844 | spin_unlock(&sched->job_list_lock); |
3741540e | 845 | |
588b9828 | 846 | return job; |
6f0e54a9 CK |
847 | } |
848 | ||
ec2edcc2 ND |
849 | /** |
850 | * drm_sched_pick_best - Get a drm sched from a sched_list with the least load | |
851 | * @sched_list: list of drm_gpu_schedulers | |
852 | * @num_sched_list: number of drm_gpu_schedulers in the sched_list | |
853 | * | |
854 | * Returns pointer of the sched with the least load or NULL if none of the | |
855 | * drm_gpu_schedulers are ready | |
856 | */ | |
857 | struct drm_gpu_scheduler * | |
858 | drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, | |
859 | unsigned int num_sched_list) | |
860 | { | |
861 | struct drm_gpu_scheduler *sched, *picked_sched = NULL; | |
862 | int i; | |
d41a39dd | 863 | unsigned int min_score = UINT_MAX, num_score; |
ec2edcc2 ND |
864 | |
865 | for (i = 0; i < num_sched_list; ++i) { | |
866 | sched = sched_list[i]; | |
867 | ||
868 | if (!sched->ready) { | |
869 | DRM_WARN("scheduler %s is not ready, skipping", | |
870 | sched->name); | |
871 | continue; | |
872 | } | |
873 | ||
f2f12eb9 | 874 | num_score = atomic_read(sched->score); |
d41a39dd ND |
875 | if (num_score < min_score) { |
876 | min_score = num_score; | |
ec2edcc2 ND |
877 | picked_sched = sched; |
878 | } | |
879 | } | |
880 | ||
881 | return picked_sched; | |
882 | } | |
883 | EXPORT_SYMBOL(drm_sched_pick_best); | |
884 | ||
2d33948e ND |
885 | /** |
886 | * drm_sched_blocked - check if the scheduler is blocked | |
887 | * | |
888 | * @sched: scheduler instance | |
889 | * | |
890 | * Returns true if blocked, otherwise false. | |
891 | */ | |
1b1f42d8 | 892 | static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) |
0875dc9e CZ |
893 | { |
894 | if (kthread_should_park()) { | |
895 | kthread_parkme(); | |
896 | return true; | |
897 | } | |
898 | ||
899 | return false; | |
900 | } | |
901 | ||
2d33948e ND |
902 | /** |
903 | * drm_sched_main - main scheduler thread | |
904 | * | |
905 | * @param: scheduler instance | |
906 | * | |
907 | * Returns 0. | |
908 | */ | |
1b1f42d8 | 909 | static int drm_sched_main(void *param) |
a72ce6f8 | 910 | { |
1b1f42d8 | 911 | struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param; |
83f4b118 | 912 | int r; |
a72ce6f8 | 913 | |
7b31e940 | 914 | sched_set_fifo_low(current); |
a72ce6f8 JZ |
915 | |
916 | while (!kthread_should_stop()) { | |
1b1f42d8 LS |
917 | struct drm_sched_entity *entity = NULL; |
918 | struct drm_sched_fence *s_fence; | |
919 | struct drm_sched_job *sched_job; | |
f54d1867 | 920 | struct dma_fence *fence; |
588b9828 | 921 | struct drm_sched_job *cleanup_job = NULL; |
6f0e54a9 | 922 | |
c2b6bd7e | 923 | wait_event_interruptible(sched->wake_up_worker, |
588b9828 | 924 | (cleanup_job = drm_sched_get_cleanup_job(sched)) || |
1b1f42d8 LS |
925 | (!drm_sched_blocked(sched) && |
926 | (entity = drm_sched_select_entity(sched))) || | |
588b9828 SP |
927 | kthread_should_stop()); |
928 | ||
bcf26654 | 929 | if (cleanup_job) |
588b9828 | 930 | sched->ops->free_job(cleanup_job); |
f85a6dd9 | 931 | |
3d651936 CK |
932 | if (!entity) |
933 | continue; | |
934 | ||
1b1f42d8 | 935 | sched_job = drm_sched_entity_pop_job(entity); |
83a7772b | 936 | |
3b5ac97a BB |
937 | if (!sched_job) { |
938 | complete(&entity->entity_idle); | |
f85a6dd9 | 939 | continue; |
3b5ac97a | 940 | } |
f85a6dd9 | 941 | |
4c7eb91c | 942 | s_fence = sched_job->s_fence; |
2440ff2c | 943 | |
b034b572 | 944 | atomic_inc(&sched->hw_rq_count); |
1b1f42d8 | 945 | drm_sched_job_begin(sched_job); |
7392c329 | 946 | |
c2c91828 | 947 | trace_drm_run_job(sched_job, entity); |
4c7eb91c | 948 | fence = sched->ops->run_job(sched_job); |
3b5ac97a | 949 | complete(&entity->entity_idle); |
1b1f42d8 | 950 | drm_sched_fence_scheduled(s_fence); |
29d25355 | 951 | |
e91e5f08 | 952 | if (!IS_ERR_OR_NULL(fence)) { |
f54d1867 | 953 | s_fence->parent = dma_fence_get(fence); |
3741540e | 954 | r = dma_fence_add_callback(fence, &sched_job->cb, |
71173e78 | 955 | drm_sched_job_done_cb); |
6f0e54a9 | 956 | if (r == -ENOENT) |
71173e78 | 957 | drm_sched_job_done(sched_job); |
6f0e54a9 | 958 | else if (r) |
16a7133f CK |
959 | DRM_ERROR("fence add callback failed (%d)\n", |
960 | r); | |
f54d1867 | 961 | dma_fence_put(fence); |
e91e5f08 | 962 | } else { |
d7c5782a AG |
963 | if (IS_ERR(fence)) |
964 | dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); | |
e91e5f08 | 965 | |
71173e78 | 966 | drm_sched_job_done(sched_job); |
e91e5f08 | 967 | } |
aef4852e | 968 | |
c2b6bd7e | 969 | wake_up(&sched->job_scheduled); |
a72ce6f8 JZ |
970 | } |
971 | return 0; | |
972 | } | |
973 | ||
a72ce6f8 | 974 | /** |
2d33948e | 975 | * drm_sched_init - Init a gpu scheduler instance |
a72ce6f8 | 976 | * |
2d33948e ND |
977 | * @sched: scheduler instance |
978 | * @ops: backend operations for this scheduler | |
979 | * @hw_submission: number of hw submissions that can be in flight | |
980 | * @hang_limit: number of times to allow a job to hang before dropping it | |
981 | * @timeout: timeout value in jiffies for the scheduler | |
78efe21b BB |
982 | * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is |
983 | * used | |
f2f12eb9 | 984 | * @score: optional score atomic shared with other schedulers |
2d33948e | 985 | * @name: name used for debugging |
a72ce6f8 | 986 | * |
4f839a24 | 987 | * Return 0 on success, otherwise error code. |
2d33948e | 988 | */ |
1b1f42d8 LS |
989 | int drm_sched_init(struct drm_gpu_scheduler *sched, |
990 | const struct drm_sched_backend_ops *ops, | |
78efe21b BB |
991 | unsigned hw_submission, unsigned hang_limit, |
992 | long timeout, struct workqueue_struct *timeout_wq, | |
f2f12eb9 | 993 | atomic_t *score, const char *name) |
a72ce6f8 | 994 | { |
9afd0756 | 995 | int i, ret; |
a72ce6f8 | 996 | sched->ops = ops; |
4cef9267 | 997 | sched->hw_submission_limit = hw_submission; |
4f839a24 | 998 | sched->name = name; |
2440ff2c | 999 | sched->timeout = timeout; |
78efe21b | 1000 | sched->timeout_wq = timeout_wq ? : system_wq; |
95aa9b1d | 1001 | sched->hang_limit = hang_limit; |
f2f12eb9 | 1002 | sched->score = score ? score : &sched->_score; |
e2d732fd | 1003 | for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++) |
8dc9fbbf | 1004 | drm_sched_rq_init(sched, &sched->sched_rq[i]); |
a72ce6f8 | 1005 | |
c2b6bd7e CK |
1006 | init_waitqueue_head(&sched->wake_up_worker); |
1007 | init_waitqueue_head(&sched->job_scheduled); | |
6efa4b46 | 1008 | INIT_LIST_HEAD(&sched->pending_list); |
4835096b | 1009 | spin_lock_init(&sched->job_list_lock); |
c746ba22 | 1010 | atomic_set(&sched->hw_rq_count, 0); |
6a962430 | 1011 | INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); |
f2f12eb9 | 1012 | atomic_set(&sched->_score, 0); |
93f8b367 | 1013 | atomic64_set(&sched->job_id_count, 0); |
4f839a24 | 1014 | |
a72ce6f8 | 1015 | /* Each scheduler will run on a seperate kernel thread */ |
1b1f42d8 | 1016 | sched->thread = kthread_run(drm_sched_main, sched, sched->name); |
f4956598 | 1017 | if (IS_ERR(sched->thread)) { |
9afd0756 SM |
1018 | ret = PTR_ERR(sched->thread); |
1019 | sched->thread = NULL; | |
4f839a24 | 1020 | DRM_ERROR("Failed to create scheduler for %s.\n", name); |
9afd0756 | 1021 | return ret; |
a72ce6f8 JZ |
1022 | } |
1023 | ||
faf6e1a8 | 1024 | sched->ready = true; |
4f839a24 | 1025 | return 0; |
a72ce6f8 | 1026 | } |
1b1f42d8 | 1027 | EXPORT_SYMBOL(drm_sched_init); |
a72ce6f8 JZ |
1028 | |
1029 | /** | |
2d33948e ND |
1030 | * drm_sched_fini - Destroy a gpu scheduler |
1031 | * | |
1032 | * @sched: scheduler instance | |
a72ce6f8 | 1033 | * |
2d33948e | 1034 | * Tears down and cleans up the scheduler. |
a72ce6f8 | 1035 | */ |
1b1f42d8 | 1036 | void drm_sched_fini(struct drm_gpu_scheduler *sched) |
a72ce6f8 | 1037 | { |
c61cdbdb AG |
1038 | struct drm_sched_entity *s_entity; |
1039 | int i; | |
1040 | ||
32544d02 DA |
1041 | if (sched->thread) |
1042 | kthread_stop(sched->thread); | |
faf6e1a8 | 1043 | |
c61cdbdb AG |
1044 | for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { |
1045 | struct drm_sched_rq *rq = &sched->sched_rq[i]; | |
1046 | ||
1047 | if (!rq) | |
1048 | continue; | |
1049 | ||
1050 | spin_lock(&rq->lock); | |
1051 | list_for_each_entry(s_entity, &rq->entities, list) | |
1052 | /* | |
1053 | * Prevents reinsertion and marks job_queue as idle, | |
1054 | * it will removed from rq in drm_sched_entity_fini | |
1055 | * eventually | |
1056 | */ | |
1057 | s_entity->stopped = true; | |
1058 | spin_unlock(&rq->lock); | |
1059 | ||
1060 | } | |
1061 | ||
1062 | /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ | |
1063 | wake_up_all(&sched->job_scheduled); | |
1064 | ||
e582951b AG |
1065 | /* Confirm no work left behind accessing device structures */ |
1066 | cancel_delayed_work_sync(&sched->work_tdr); | |
1067 | ||
faf6e1a8 | 1068 | sched->ready = false; |
a72ce6f8 | 1069 | } |
1b1f42d8 | 1070 | EXPORT_SYMBOL(drm_sched_fini); |
e6c6338f JZ |
1071 | |
1072 | /** | |
1073 | * drm_sched_increase_karma_ext - Update sched_entity guilty flag | |
1074 | * | |
1075 | * @bad: The job guilty of time out | |
1076 | * @type: type for increase/reset karma | |
1077 | * | |
1078 | */ | |
1079 | void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type) | |
1080 | { | |
1081 | int i; | |
1082 | struct drm_sched_entity *tmp; | |
1083 | struct drm_sched_entity *entity; | |
1084 | struct drm_gpu_scheduler *sched = bad->sched; | |
1085 | ||
1086 | /* don't change @bad's karma if it's from KERNEL RQ, | |
1087 | * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) | |
1088 | * corrupt but keep in mind that kernel jobs always considered good. | |
1089 | */ | |
1090 | if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { | |
1091 | if (type == 0) | |
1092 | atomic_set(&bad->karma, 0); | |
1093 | else if (type == 1) | |
1094 | atomic_inc(&bad->karma); | |
1095 | ||
1096 | for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; | |
1097 | i++) { | |
1098 | struct drm_sched_rq *rq = &sched->sched_rq[i]; | |
1099 | ||
1100 | spin_lock(&rq->lock); | |
1101 | list_for_each_entry_safe(entity, tmp, &rq->entities, list) { | |
1102 | if (bad->s_fence->scheduled.context == | |
1103 | entity->fence_context) { | |
1104 | if (entity->guilty) | |
1105 | atomic_set(entity->guilty, type); | |
1106 | break; | |
1107 | } | |
1108 | } | |
1109 | spin_unlock(&rq->lock); | |
1110 | if (&entity->list != &rq->entities) | |
1111 | break; | |
1112 | } | |
1113 | } | |
1114 | } | |
1115 | EXPORT_SYMBOL(drm_sched_increase_karma_ext); |