Commit | Line | Data |
---|---|---|
aab03e05 DF |
1 | /* |
2 | * Deadline Scheduling Class (SCHED_DEADLINE) | |
3 | * | |
4 | * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS). | |
5 | * | |
6 | * Tasks that periodically executes their instances for less than their | |
7 | * runtime won't miss any of their deadlines. | |
8 | * Tasks that are not periodic or sporadic or that tries to execute more | |
9 | * than their reserved bandwidth will be slowed down (and may potentially | |
10 | * miss some of their deadlines), and won't affect any other task. | |
11 | * | |
12 | * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>, | |
1baca4ce | 13 | * Juri Lelli <juri.lelli@gmail.com>, |
aab03e05 DF |
14 | * Michael Trimarchi <michael@amarulasolutions.com>, |
15 | * Fabio Checconi <fchecconi@gmail.com> | |
16 | */ | |
17 | #include "sched.h" | |
18 | ||
19 | static inline int dl_time_before(u64 a, u64 b) | |
20 | { | |
21 | return (s64)(a - b) < 0; | |
22 | } | |
23 | ||
1baca4ce JL |
24 | /* |
25 | * Tells if entity @a should preempt entity @b. | |
26 | */ | |
27 | static inline | |
28 | int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) | |
29 | { | |
30 | return dl_time_before(a->deadline, b->deadline); | |
31 | } | |
32 | ||
aab03e05 DF |
33 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) |
34 | { | |
35 | return container_of(dl_se, struct task_struct, dl); | |
36 | } | |
37 | ||
38 | static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) | |
39 | { | |
40 | return container_of(dl_rq, struct rq, dl); | |
41 | } | |
42 | ||
43 | static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se) | |
44 | { | |
45 | struct task_struct *p = dl_task_of(dl_se); | |
46 | struct rq *rq = task_rq(p); | |
47 | ||
48 | return &rq->dl; | |
49 | } | |
50 | ||
51 | static inline int on_dl_rq(struct sched_dl_entity *dl_se) | |
52 | { | |
53 | return !RB_EMPTY_NODE(&dl_se->rb_node); | |
54 | } | |
55 | ||
56 | static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) | |
57 | { | |
58 | struct sched_dl_entity *dl_se = &p->dl; | |
59 | ||
60 | return dl_rq->rb_leftmost == &dl_se->rb_node; | |
61 | } | |
62 | ||
63 | void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq) | |
64 | { | |
65 | dl_rq->rb_root = RB_ROOT; | |
1baca4ce JL |
66 | |
67 | #ifdef CONFIG_SMP | |
68 | /* zero means no -deadline tasks */ | |
69 | dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0; | |
70 | ||
71 | dl_rq->dl_nr_migratory = 0; | |
72 | dl_rq->overloaded = 0; | |
73 | dl_rq->pushable_dl_tasks_root = RB_ROOT; | |
74 | #endif | |
75 | } | |
76 | ||
77 | #ifdef CONFIG_SMP | |
78 | ||
79 | static inline int dl_overloaded(struct rq *rq) | |
80 | { | |
81 | return atomic_read(&rq->rd->dlo_count); | |
82 | } | |
83 | ||
84 | static inline void dl_set_overload(struct rq *rq) | |
85 | { | |
86 | if (!rq->online) | |
87 | return; | |
88 | ||
89 | cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask); | |
90 | /* | |
91 | * Must be visible before the overload count is | |
92 | * set (as in sched_rt.c). | |
93 | * | |
94 | * Matched by the barrier in pull_dl_task(). | |
95 | */ | |
96 | smp_wmb(); | |
97 | atomic_inc(&rq->rd->dlo_count); | |
98 | } | |
99 | ||
100 | static inline void dl_clear_overload(struct rq *rq) | |
101 | { | |
102 | if (!rq->online) | |
103 | return; | |
104 | ||
105 | atomic_dec(&rq->rd->dlo_count); | |
106 | cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask); | |
107 | } | |
108 | ||
109 | static void update_dl_migration(struct dl_rq *dl_rq) | |
110 | { | |
111 | if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_total > 1) { | |
112 | if (!dl_rq->overloaded) { | |
113 | dl_set_overload(rq_of_dl_rq(dl_rq)); | |
114 | dl_rq->overloaded = 1; | |
115 | } | |
116 | } else if (dl_rq->overloaded) { | |
117 | dl_clear_overload(rq_of_dl_rq(dl_rq)); | |
118 | dl_rq->overloaded = 0; | |
119 | } | |
120 | } | |
121 | ||
122 | static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
123 | { | |
124 | struct task_struct *p = dl_task_of(dl_se); | |
125 | dl_rq = &rq_of_dl_rq(dl_rq)->dl; | |
126 | ||
127 | dl_rq->dl_nr_total++; | |
128 | if (p->nr_cpus_allowed > 1) | |
129 | dl_rq->dl_nr_migratory++; | |
130 | ||
131 | update_dl_migration(dl_rq); | |
132 | } | |
133 | ||
134 | static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
135 | { | |
136 | struct task_struct *p = dl_task_of(dl_se); | |
137 | dl_rq = &rq_of_dl_rq(dl_rq)->dl; | |
138 | ||
139 | dl_rq->dl_nr_total--; | |
140 | if (p->nr_cpus_allowed > 1) | |
141 | dl_rq->dl_nr_migratory--; | |
142 | ||
143 | update_dl_migration(dl_rq); | |
144 | } | |
145 | ||
146 | /* | |
147 | * The list of pushable -deadline task is not a plist, like in | |
148 | * sched_rt.c, it is an rb-tree with tasks ordered by deadline. | |
149 | */ | |
150 | static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | |
151 | { | |
152 | struct dl_rq *dl_rq = &rq->dl; | |
153 | struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; | |
154 | struct rb_node *parent = NULL; | |
155 | struct task_struct *entry; | |
156 | int leftmost = 1; | |
157 | ||
158 | BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); | |
159 | ||
160 | while (*link) { | |
161 | parent = *link; | |
162 | entry = rb_entry(parent, struct task_struct, | |
163 | pushable_dl_tasks); | |
164 | if (dl_entity_preempt(&p->dl, &entry->dl)) | |
165 | link = &parent->rb_left; | |
166 | else { | |
167 | link = &parent->rb_right; | |
168 | leftmost = 0; | |
169 | } | |
170 | } | |
171 | ||
172 | if (leftmost) | |
173 | dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; | |
174 | ||
175 | rb_link_node(&p->pushable_dl_tasks, parent, link); | |
176 | rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | |
aab03e05 DF |
177 | } |
178 | ||
1baca4ce JL |
179 | static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) |
180 | { | |
181 | struct dl_rq *dl_rq = &rq->dl; | |
182 | ||
183 | if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) | |
184 | return; | |
185 | ||
186 | if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { | |
187 | struct rb_node *next_node; | |
188 | ||
189 | next_node = rb_next(&p->pushable_dl_tasks); | |
190 | dl_rq->pushable_dl_tasks_leftmost = next_node; | |
191 | } | |
192 | ||
193 | rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); | |
194 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
195 | } | |
196 | ||
197 | static inline int has_pushable_dl_tasks(struct rq *rq) | |
198 | { | |
199 | return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); | |
200 | } | |
201 | ||
202 | static int push_dl_task(struct rq *rq); | |
203 | ||
204 | #else | |
205 | ||
206 | static inline | |
207 | void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) | |
208 | { | |
209 | } | |
210 | ||
211 | static inline | |
212 | void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) | |
213 | { | |
214 | } | |
215 | ||
216 | static inline | |
217 | void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
218 | { | |
219 | } | |
220 | ||
221 | static inline | |
222 | void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
223 | { | |
224 | } | |
225 | ||
226 | #endif /* CONFIG_SMP */ | |
227 | ||
aab03e05 DF |
228 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); |
229 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); | |
230 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | |
231 | int flags); | |
232 | ||
233 | /* | |
234 | * We are being explicitly informed that a new instance is starting, | |
235 | * and this means that: | |
236 | * - the absolute deadline of the entity has to be placed at | |
237 | * current time + relative deadline; | |
238 | * - the runtime of the entity has to be set to the maximum value. | |
239 | * | |
240 | * The capability of specifying such event is useful whenever a -deadline | |
241 | * entity wants to (try to!) synchronize its behaviour with the scheduler's | |
242 | * one, and to (try to!) reconcile itself with its own scheduling | |
243 | * parameters. | |
244 | */ | |
245 | static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) | |
246 | { | |
247 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
248 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
249 | ||
250 | WARN_ON(!dl_se->dl_new || dl_se->dl_throttled); | |
251 | ||
252 | /* | |
253 | * We use the regular wall clock time to set deadlines in the | |
254 | * future; in fact, we must consider execution overheads (time | |
255 | * spent on hardirq context, etc.). | |
256 | */ | |
257 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | |
258 | dl_se->runtime = dl_se->dl_runtime; | |
259 | dl_se->dl_new = 0; | |
260 | } | |
261 | ||
262 | /* | |
263 | * Pure Earliest Deadline First (EDF) scheduling does not deal with the | |
264 | * possibility of a entity lasting more than what it declared, and thus | |
265 | * exhausting its runtime. | |
266 | * | |
267 | * Here we are interested in making runtime overrun possible, but we do | |
268 | * not want a entity which is misbehaving to affect the scheduling of all | |
269 | * other entities. | |
270 | * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS) | |
271 | * is used, in order to confine each entity within its own bandwidth. | |
272 | * | |
273 | * This function deals exactly with that, and ensures that when the runtime | |
274 | * of a entity is replenished, its deadline is also postponed. That ensures | |
275 | * the overrunning entity can't interfere with other entity in the system and | |
276 | * can't make them miss their deadlines. Reasons why this kind of overruns | |
277 | * could happen are, typically, a entity voluntarily trying to overcome its | |
278 | * runtime, or it just underestimated it during sched_setscheduler_ex(). | |
279 | */ | |
280 | static void replenish_dl_entity(struct sched_dl_entity *dl_se) | |
281 | { | |
282 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
283 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
284 | ||
285 | /* | |
286 | * We keep moving the deadline away until we get some | |
287 | * available runtime for the entity. This ensures correct | |
288 | * handling of situations where the runtime overrun is | |
289 | * arbitrary large. | |
290 | */ | |
291 | while (dl_se->runtime <= 0) { | |
292 | dl_se->deadline += dl_se->dl_deadline; | |
293 | dl_se->runtime += dl_se->dl_runtime; | |
294 | } | |
295 | ||
296 | /* | |
297 | * At this point, the deadline really should be "in | |
298 | * the future" with respect to rq->clock. If it's | |
299 | * not, we are, for some reason, lagging too much! | |
300 | * Anyway, after having warn userspace abut that, | |
301 | * we still try to keep the things running by | |
302 | * resetting the deadline and the budget of the | |
303 | * entity. | |
304 | */ | |
305 | if (dl_time_before(dl_se->deadline, rq_clock(rq))) { | |
306 | static bool lag_once = false; | |
307 | ||
308 | if (!lag_once) { | |
309 | lag_once = true; | |
310 | printk_sched("sched: DL replenish lagged to much\n"); | |
311 | } | |
312 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | |
313 | dl_se->runtime = dl_se->dl_runtime; | |
314 | } | |
315 | } | |
316 | ||
317 | /* | |
318 | * Here we check if --at time t-- an entity (which is probably being | |
319 | * [re]activated or, in general, enqueued) can use its remaining runtime | |
320 | * and its current deadline _without_ exceeding the bandwidth it is | |
321 | * assigned (function returns true if it can't). We are in fact applying | |
322 | * one of the CBS rules: when a task wakes up, if the residual runtime | |
323 | * over residual deadline fits within the allocated bandwidth, then we | |
324 | * can keep the current (absolute) deadline and residual budget without | |
325 | * disrupting the schedulability of the system. Otherwise, we should | |
326 | * refill the runtime and set the deadline a period in the future, | |
327 | * because keeping the current (absolute) deadline of the task would | |
328 | * result in breaking guarantees promised to other tasks. | |
329 | * | |
330 | * This function returns true if: | |
331 | * | |
332 | * runtime / (deadline - t) > dl_runtime / dl_deadline , | |
333 | * | |
334 | * IOW we can't recycle current parameters. | |
335 | */ | |
336 | static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) | |
337 | { | |
338 | u64 left, right; | |
339 | ||
340 | /* | |
341 | * left and right are the two sides of the equation above, | |
342 | * after a bit of shuffling to use multiplications instead | |
343 | * of divisions. | |
344 | * | |
345 | * Note that none of the time values involved in the two | |
346 | * multiplications are absolute: dl_deadline and dl_runtime | |
347 | * are the relative deadline and the maximum runtime of each | |
348 | * instance, runtime is the runtime left for the last instance | |
349 | * and (deadline - t), since t is rq->clock, is the time left | |
350 | * to the (absolute) deadline. Even if overflowing the u64 type | |
351 | * is very unlikely to occur in both cases, here we scale down | |
352 | * as we want to avoid that risk at all. Scaling down by 10 | |
353 | * means that we reduce granularity to 1us. We are fine with it, | |
354 | * since this is only a true/false check and, anyway, thinking | |
355 | * of anything below microseconds resolution is actually fiction | |
356 | * (but still we want to give the user that illusion >;). | |
357 | */ | |
358 | left = (dl_se->dl_deadline >> 10) * (dl_se->runtime >> 10); | |
359 | right = ((dl_se->deadline - t) >> 10) * (dl_se->dl_runtime >> 10); | |
360 | ||
361 | return dl_time_before(right, left); | |
362 | } | |
363 | ||
364 | /* | |
365 | * When a -deadline entity is queued back on the runqueue, its runtime and | |
366 | * deadline might need updating. | |
367 | * | |
368 | * The policy here is that we update the deadline of the entity only if: | |
369 | * - the current deadline is in the past, | |
370 | * - using the remaining runtime with the current deadline would make | |
371 | * the entity exceed its bandwidth. | |
372 | */ | |
373 | static void update_dl_entity(struct sched_dl_entity *dl_se) | |
374 | { | |
375 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
376 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
377 | ||
378 | /* | |
379 | * The arrival of a new instance needs special treatment, i.e., | |
380 | * the actual scheduling parameters have to be "renewed". | |
381 | */ | |
382 | if (dl_se->dl_new) { | |
383 | setup_new_dl_entity(dl_se); | |
384 | return; | |
385 | } | |
386 | ||
387 | if (dl_time_before(dl_se->deadline, rq_clock(rq)) || | |
388 | dl_entity_overflow(dl_se, rq_clock(rq))) { | |
389 | dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; | |
390 | dl_se->runtime = dl_se->dl_runtime; | |
391 | } | |
392 | } | |
393 | ||
394 | /* | |
395 | * If the entity depleted all its runtime, and if we want it to sleep | |
396 | * while waiting for some new execution time to become available, we | |
397 | * set the bandwidth enforcement timer to the replenishment instant | |
398 | * and try to activate it. | |
399 | * | |
400 | * Notice that it is important for the caller to know if the timer | |
401 | * actually started or not (i.e., the replenishment instant is in | |
402 | * the future or in the past). | |
403 | */ | |
404 | static int start_dl_timer(struct sched_dl_entity *dl_se) | |
405 | { | |
406 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
407 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
408 | ktime_t now, act; | |
409 | ktime_t soft, hard; | |
410 | unsigned long range; | |
411 | s64 delta; | |
412 | ||
413 | /* | |
414 | * We want the timer to fire at the deadline, but considering | |
415 | * that it is actually coming from rq->clock and not from | |
416 | * hrtimer's time base reading. | |
417 | */ | |
418 | act = ns_to_ktime(dl_se->deadline); | |
419 | now = hrtimer_cb_get_time(&dl_se->dl_timer); | |
420 | delta = ktime_to_ns(now) - rq_clock(rq); | |
421 | act = ktime_add_ns(act, delta); | |
422 | ||
423 | /* | |
424 | * If the expiry time already passed, e.g., because the value | |
425 | * chosen as the deadline is too small, don't even try to | |
426 | * start the timer in the past! | |
427 | */ | |
428 | if (ktime_us_delta(act, now) < 0) | |
429 | return 0; | |
430 | ||
431 | hrtimer_set_expires(&dl_se->dl_timer, act); | |
432 | ||
433 | soft = hrtimer_get_softexpires(&dl_se->dl_timer); | |
434 | hard = hrtimer_get_expires(&dl_se->dl_timer); | |
435 | range = ktime_to_ns(ktime_sub(hard, soft)); | |
436 | __hrtimer_start_range_ns(&dl_se->dl_timer, soft, | |
437 | range, HRTIMER_MODE_ABS, 0); | |
438 | ||
439 | return hrtimer_active(&dl_se->dl_timer); | |
440 | } | |
441 | ||
442 | /* | |
443 | * This is the bandwidth enforcement timer callback. If here, we know | |
444 | * a task is not on its dl_rq, since the fact that the timer was running | |
445 | * means the task is throttled and needs a runtime replenishment. | |
446 | * | |
447 | * However, what we actually do depends on the fact the task is active, | |
448 | * (it is on its rq) or has been removed from there by a call to | |
449 | * dequeue_task_dl(). In the former case we must issue the runtime | |
450 | * replenishment and add the task back to the dl_rq; in the latter, we just | |
451 | * do nothing but clearing dl_throttled, so that runtime and deadline | |
452 | * updating (and the queueing back to dl_rq) will be done by the | |
453 | * next call to enqueue_task_dl(). | |
454 | */ | |
455 | static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) | |
456 | { | |
457 | struct sched_dl_entity *dl_se = container_of(timer, | |
458 | struct sched_dl_entity, | |
459 | dl_timer); | |
460 | struct task_struct *p = dl_task_of(dl_se); | |
461 | struct rq *rq = task_rq(p); | |
462 | raw_spin_lock(&rq->lock); | |
463 | ||
464 | /* | |
465 | * We need to take care of a possible races here. In fact, the | |
466 | * task might have changed its scheduling policy to something | |
467 | * different from SCHED_DEADLINE or changed its reservation | |
468 | * parameters (through sched_setscheduler()). | |
469 | */ | |
470 | if (!dl_task(p) || dl_se->dl_new) | |
471 | goto unlock; | |
472 | ||
473 | sched_clock_tick(); | |
474 | update_rq_clock(rq); | |
475 | dl_se->dl_throttled = 0; | |
476 | if (p->on_rq) { | |
477 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | |
478 | if (task_has_dl_policy(rq->curr)) | |
479 | check_preempt_curr_dl(rq, p, 0); | |
480 | else | |
481 | resched_task(rq->curr); | |
1baca4ce JL |
482 | #ifdef CONFIG_SMP |
483 | /* | |
484 | * Queueing this task back might have overloaded rq, | |
485 | * check if we need to kick someone away. | |
486 | */ | |
487 | if (has_pushable_dl_tasks(rq)) | |
488 | push_dl_task(rq); | |
489 | #endif | |
aab03e05 DF |
490 | } |
491 | unlock: | |
492 | raw_spin_unlock(&rq->lock); | |
493 | ||
494 | return HRTIMER_NORESTART; | |
495 | } | |
496 | ||
497 | void init_dl_task_timer(struct sched_dl_entity *dl_se) | |
498 | { | |
499 | struct hrtimer *timer = &dl_se->dl_timer; | |
500 | ||
501 | if (hrtimer_active(timer)) { | |
502 | hrtimer_try_to_cancel(timer); | |
503 | return; | |
504 | } | |
505 | ||
506 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
507 | timer->function = dl_task_timer; | |
508 | } | |
509 | ||
510 | static | |
511 | int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) | |
512 | { | |
513 | int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); | |
514 | int rorun = dl_se->runtime <= 0; | |
515 | ||
516 | if (!rorun && !dmiss) | |
517 | return 0; | |
518 | ||
519 | /* | |
520 | * If we are beyond our current deadline and we are still | |
521 | * executing, then we have already used some of the runtime of | |
522 | * the next instance. Thus, if we do not account that, we are | |
523 | * stealing bandwidth from the system at each deadline miss! | |
524 | */ | |
525 | if (dmiss) { | |
526 | dl_se->runtime = rorun ? dl_se->runtime : 0; | |
527 | dl_se->runtime -= rq_clock(rq) - dl_se->deadline; | |
528 | } | |
529 | ||
530 | return 1; | |
531 | } | |
532 | ||
533 | /* | |
534 | * Update the current task's runtime statistics (provided it is still | |
535 | * a -deadline task and has not been removed from the dl_rq). | |
536 | */ | |
537 | static void update_curr_dl(struct rq *rq) | |
538 | { | |
539 | struct task_struct *curr = rq->curr; | |
540 | struct sched_dl_entity *dl_se = &curr->dl; | |
541 | u64 delta_exec; | |
542 | ||
543 | if (!dl_task(curr) || !on_dl_rq(dl_se)) | |
544 | return; | |
545 | ||
546 | /* | |
547 | * Consumed budget is computed considering the time as | |
548 | * observed by schedulable tasks (excluding time spent | |
549 | * in hardirq context, etc.). Deadlines are instead | |
550 | * computed using hard walltime. This seems to be the more | |
551 | * natural solution, but the full ramifications of this | |
552 | * approach need further study. | |
553 | */ | |
554 | delta_exec = rq_clock_task(rq) - curr->se.exec_start; | |
555 | if (unlikely((s64)delta_exec < 0)) | |
556 | delta_exec = 0; | |
557 | ||
558 | schedstat_set(curr->se.statistics.exec_max, | |
559 | max(curr->se.statistics.exec_max, delta_exec)); | |
560 | ||
561 | curr->se.sum_exec_runtime += delta_exec; | |
562 | account_group_exec_runtime(curr, delta_exec); | |
563 | ||
564 | curr->se.exec_start = rq_clock_task(rq); | |
565 | cpuacct_charge(curr, delta_exec); | |
566 | ||
567 | dl_se->runtime -= delta_exec; | |
568 | if (dl_runtime_exceeded(rq, dl_se)) { | |
569 | __dequeue_task_dl(rq, curr, 0); | |
570 | if (likely(start_dl_timer(dl_se))) | |
571 | dl_se->dl_throttled = 1; | |
572 | else | |
573 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | |
574 | ||
575 | if (!is_leftmost(curr, &rq->dl)) | |
576 | resched_task(curr); | |
577 | } | |
578 | } | |
579 | ||
1baca4ce JL |
580 | #ifdef CONFIG_SMP |
581 | ||
582 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu); | |
583 | ||
584 | static inline u64 next_deadline(struct rq *rq) | |
585 | { | |
586 | struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu); | |
587 | ||
588 | if (next && dl_prio(next->prio)) | |
589 | return next->dl.deadline; | |
590 | else | |
591 | return 0; | |
592 | } | |
593 | ||
594 | static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | |
595 | { | |
596 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
597 | ||
598 | if (dl_rq->earliest_dl.curr == 0 || | |
599 | dl_time_before(deadline, dl_rq->earliest_dl.curr)) { | |
600 | /* | |
601 | * If the dl_rq had no -deadline tasks, or if the new task | |
602 | * has shorter deadline than the current one on dl_rq, we | |
603 | * know that the previous earliest becomes our next earliest, | |
604 | * as the new task becomes the earliest itself. | |
605 | */ | |
606 | dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; | |
607 | dl_rq->earliest_dl.curr = deadline; | |
608 | } else if (dl_rq->earliest_dl.next == 0 || | |
609 | dl_time_before(deadline, dl_rq->earliest_dl.next)) { | |
610 | /* | |
611 | * On the other hand, if the new -deadline task has a | |
612 | * a later deadline than the earliest one on dl_rq, but | |
613 | * it is earlier than the next (if any), we must | |
614 | * recompute the next-earliest. | |
615 | */ | |
616 | dl_rq->earliest_dl.next = next_deadline(rq); | |
617 | } | |
618 | } | |
619 | ||
620 | static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | |
621 | { | |
622 | struct rq *rq = rq_of_dl_rq(dl_rq); | |
623 | ||
624 | /* | |
625 | * Since we may have removed our earliest (and/or next earliest) | |
626 | * task we must recompute them. | |
627 | */ | |
628 | if (!dl_rq->dl_nr_running) { | |
629 | dl_rq->earliest_dl.curr = 0; | |
630 | dl_rq->earliest_dl.next = 0; | |
631 | } else { | |
632 | struct rb_node *leftmost = dl_rq->rb_leftmost; | |
633 | struct sched_dl_entity *entry; | |
634 | ||
635 | entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); | |
636 | dl_rq->earliest_dl.curr = entry->deadline; | |
637 | dl_rq->earliest_dl.next = next_deadline(rq); | |
638 | } | |
639 | } | |
640 | ||
641 | #else | |
642 | ||
643 | static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | |
644 | static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} | |
645 | ||
646 | #endif /* CONFIG_SMP */ | |
647 | ||
648 | static inline | |
649 | void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
650 | { | |
651 | int prio = dl_task_of(dl_se)->prio; | |
652 | u64 deadline = dl_se->deadline; | |
653 | ||
654 | WARN_ON(!dl_prio(prio)); | |
655 | dl_rq->dl_nr_running++; | |
656 | ||
657 | inc_dl_deadline(dl_rq, deadline); | |
658 | inc_dl_migration(dl_se, dl_rq); | |
659 | } | |
660 | ||
661 | static inline | |
662 | void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
663 | { | |
664 | int prio = dl_task_of(dl_se)->prio; | |
665 | ||
666 | WARN_ON(!dl_prio(prio)); | |
667 | WARN_ON(!dl_rq->dl_nr_running); | |
668 | dl_rq->dl_nr_running--; | |
669 | ||
670 | dec_dl_deadline(dl_rq, dl_se->deadline); | |
671 | dec_dl_migration(dl_se, dl_rq); | |
672 | } | |
673 | ||
aab03e05 DF |
674 | static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) |
675 | { | |
676 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
677 | struct rb_node **link = &dl_rq->rb_root.rb_node; | |
678 | struct rb_node *parent = NULL; | |
679 | struct sched_dl_entity *entry; | |
680 | int leftmost = 1; | |
681 | ||
682 | BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); | |
683 | ||
684 | while (*link) { | |
685 | parent = *link; | |
686 | entry = rb_entry(parent, struct sched_dl_entity, rb_node); | |
687 | if (dl_time_before(dl_se->deadline, entry->deadline)) | |
688 | link = &parent->rb_left; | |
689 | else { | |
690 | link = &parent->rb_right; | |
691 | leftmost = 0; | |
692 | } | |
693 | } | |
694 | ||
695 | if (leftmost) | |
696 | dl_rq->rb_leftmost = &dl_se->rb_node; | |
697 | ||
698 | rb_link_node(&dl_se->rb_node, parent, link); | |
699 | rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); | |
700 | ||
1baca4ce | 701 | inc_dl_tasks(dl_se, dl_rq); |
aab03e05 DF |
702 | } |
703 | ||
704 | static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) | |
705 | { | |
706 | struct dl_rq *dl_rq = dl_rq_of_se(dl_se); | |
707 | ||
708 | if (RB_EMPTY_NODE(&dl_se->rb_node)) | |
709 | return; | |
710 | ||
711 | if (dl_rq->rb_leftmost == &dl_se->rb_node) { | |
712 | struct rb_node *next_node; | |
713 | ||
714 | next_node = rb_next(&dl_se->rb_node); | |
715 | dl_rq->rb_leftmost = next_node; | |
716 | } | |
717 | ||
718 | rb_erase(&dl_se->rb_node, &dl_rq->rb_root); | |
719 | RB_CLEAR_NODE(&dl_se->rb_node); | |
720 | ||
1baca4ce | 721 | dec_dl_tasks(dl_se, dl_rq); |
aab03e05 DF |
722 | } |
723 | ||
724 | static void | |
725 | enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) | |
726 | { | |
727 | BUG_ON(on_dl_rq(dl_se)); | |
728 | ||
729 | /* | |
730 | * If this is a wakeup or a new instance, the scheduling | |
731 | * parameters of the task might need updating. Otherwise, | |
732 | * we want a replenishment of its runtime. | |
733 | */ | |
734 | if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) | |
735 | replenish_dl_entity(dl_se); | |
736 | else | |
737 | update_dl_entity(dl_se); | |
738 | ||
739 | __enqueue_dl_entity(dl_se); | |
740 | } | |
741 | ||
742 | static void dequeue_dl_entity(struct sched_dl_entity *dl_se) | |
743 | { | |
744 | __dequeue_dl_entity(dl_se); | |
745 | } | |
746 | ||
747 | static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |
748 | { | |
749 | /* | |
750 | * If p is throttled, we do nothing. In fact, if it exhausted | |
751 | * its budget it needs a replenishment and, since it now is on | |
752 | * its rq, the bandwidth timer callback (which clearly has not | |
753 | * run yet) will take care of this. | |
754 | */ | |
755 | if (p->dl.dl_throttled) | |
756 | return; | |
757 | ||
758 | enqueue_dl_entity(&p->dl, flags); | |
1baca4ce JL |
759 | |
760 | if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | |
761 | enqueue_pushable_dl_task(rq, p); | |
762 | ||
aab03e05 DF |
763 | inc_nr_running(rq); |
764 | } | |
765 | ||
766 | static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |
767 | { | |
768 | dequeue_dl_entity(&p->dl); | |
1baca4ce | 769 | dequeue_pushable_dl_task(rq, p); |
aab03e05 DF |
770 | } |
771 | ||
772 | static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |
773 | { | |
774 | update_curr_dl(rq); | |
775 | __dequeue_task_dl(rq, p, flags); | |
776 | ||
777 | dec_nr_running(rq); | |
778 | } | |
779 | ||
780 | /* | |
781 | * Yield task semantic for -deadline tasks is: | |
782 | * | |
783 | * get off from the CPU until our next instance, with | |
784 | * a new runtime. This is of little use now, since we | |
785 | * don't have a bandwidth reclaiming mechanism. Anyway, | |
786 | * bandwidth reclaiming is planned for the future, and | |
787 | * yield_task_dl will indicate that some spare budget | |
788 | * is available for other task instances to use it. | |
789 | */ | |
790 | static void yield_task_dl(struct rq *rq) | |
791 | { | |
792 | struct task_struct *p = rq->curr; | |
793 | ||
794 | /* | |
795 | * We make the task go to sleep until its current deadline by | |
796 | * forcing its runtime to zero. This way, update_curr_dl() stops | |
797 | * it and the bandwidth timer will wake it up and will give it | |
798 | * new scheduling parameters (thanks to dl_new=1). | |
799 | */ | |
800 | if (p->dl.runtime > 0) { | |
801 | rq->curr->dl.dl_new = 1; | |
802 | p->dl.runtime = 0; | |
803 | } | |
804 | update_curr_dl(rq); | |
805 | } | |
806 | ||
1baca4ce JL |
807 | #ifdef CONFIG_SMP |
808 | ||
809 | static int find_later_rq(struct task_struct *task); | |
810 | static int latest_cpu_find(struct cpumask *span, | |
811 | struct task_struct *task, | |
812 | struct cpumask *later_mask); | |
813 | ||
814 | static int | |
815 | select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | |
816 | { | |
817 | struct task_struct *curr; | |
818 | struct rq *rq; | |
819 | ||
820 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | |
821 | goto out; | |
822 | ||
823 | rq = cpu_rq(cpu); | |
824 | ||
825 | rcu_read_lock(); | |
826 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | |
827 | ||
828 | /* | |
829 | * If we are dealing with a -deadline task, we must | |
830 | * decide where to wake it up. | |
831 | * If it has a later deadline and the current task | |
832 | * on this rq can't move (provided the waking task | |
833 | * can!) we prefer to send it somewhere else. On the | |
834 | * other hand, if it has a shorter deadline, we | |
835 | * try to make it stay here, it might be important. | |
836 | */ | |
837 | if (unlikely(dl_task(curr)) && | |
838 | (curr->nr_cpus_allowed < 2 || | |
839 | !dl_entity_preempt(&p->dl, &curr->dl)) && | |
840 | (p->nr_cpus_allowed > 1)) { | |
841 | int target = find_later_rq(p); | |
842 | ||
843 | if (target != -1) | |
844 | cpu = target; | |
845 | } | |
846 | rcu_read_unlock(); | |
847 | ||
848 | out: | |
849 | return cpu; | |
850 | } | |
851 | ||
852 | static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |
853 | { | |
854 | /* | |
855 | * Current can't be migrated, useless to reschedule, | |
856 | * let's hope p can move out. | |
857 | */ | |
858 | if (rq->curr->nr_cpus_allowed == 1 || | |
859 | latest_cpu_find(rq->rd->span, rq->curr, NULL) == -1) | |
860 | return; | |
861 | ||
862 | /* | |
863 | * p is migratable, so let's not schedule it and | |
864 | * see if it is pushed or pulled somewhere else. | |
865 | */ | |
866 | if (p->nr_cpus_allowed != 1 && | |
867 | latest_cpu_find(rq->rd->span, p, NULL) != -1) | |
868 | return; | |
869 | ||
870 | resched_task(rq->curr); | |
871 | } | |
872 | ||
873 | #endif /* CONFIG_SMP */ | |
874 | ||
aab03e05 DF |
875 | /* |
876 | * Only called when both the current and waking task are -deadline | |
877 | * tasks. | |
878 | */ | |
879 | static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | |
880 | int flags) | |
881 | { | |
1baca4ce | 882 | if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { |
aab03e05 | 883 | resched_task(rq->curr); |
1baca4ce JL |
884 | return; |
885 | } | |
886 | ||
887 | #ifdef CONFIG_SMP | |
888 | /* | |
889 | * In the unlikely case current and p have the same deadline | |
890 | * let us try to decide what's the best thing to do... | |
891 | */ | |
892 | if ((s64)(p->dl.deadline - rq->curr->dl.deadline) == 0 && | |
893 | !need_resched()) | |
894 | check_preempt_equal_dl(rq, p); | |
895 | #endif /* CONFIG_SMP */ | |
aab03e05 DF |
896 | } |
897 | ||
898 | #ifdef CONFIG_SCHED_HRTICK | |
899 | static void start_hrtick_dl(struct rq *rq, struct task_struct *p) | |
900 | { | |
901 | s64 delta = p->dl.dl_runtime - p->dl.runtime; | |
902 | ||
903 | if (delta > 10000) | |
904 | hrtick_start(rq, p->dl.runtime); | |
905 | } | |
906 | #endif | |
907 | ||
908 | static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, | |
909 | struct dl_rq *dl_rq) | |
910 | { | |
911 | struct rb_node *left = dl_rq->rb_leftmost; | |
912 | ||
913 | if (!left) | |
914 | return NULL; | |
915 | ||
916 | return rb_entry(left, struct sched_dl_entity, rb_node); | |
917 | } | |
918 | ||
919 | struct task_struct *pick_next_task_dl(struct rq *rq) | |
920 | { | |
921 | struct sched_dl_entity *dl_se; | |
922 | struct task_struct *p; | |
923 | struct dl_rq *dl_rq; | |
924 | ||
925 | dl_rq = &rq->dl; | |
926 | ||
927 | if (unlikely(!dl_rq->dl_nr_running)) | |
928 | return NULL; | |
929 | ||
930 | dl_se = pick_next_dl_entity(rq, dl_rq); | |
931 | BUG_ON(!dl_se); | |
932 | ||
933 | p = dl_task_of(dl_se); | |
934 | p->se.exec_start = rq_clock_task(rq); | |
1baca4ce JL |
935 | |
936 | /* Running task will never be pushed. */ | |
937 | if (p) | |
938 | dequeue_pushable_dl_task(rq, p); | |
939 | ||
aab03e05 DF |
940 | #ifdef CONFIG_SCHED_HRTICK |
941 | if (hrtick_enabled(rq)) | |
942 | start_hrtick_dl(rq, p); | |
943 | #endif | |
1baca4ce JL |
944 | |
945 | #ifdef CONFIG_SMP | |
946 | rq->post_schedule = has_pushable_dl_tasks(rq); | |
947 | #endif /* CONFIG_SMP */ | |
948 | ||
aab03e05 DF |
949 | return p; |
950 | } | |
951 | ||
952 | static void put_prev_task_dl(struct rq *rq, struct task_struct *p) | |
953 | { | |
954 | update_curr_dl(rq); | |
1baca4ce JL |
955 | |
956 | if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) | |
957 | enqueue_pushable_dl_task(rq, p); | |
aab03e05 DF |
958 | } |
959 | ||
960 | static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) | |
961 | { | |
962 | update_curr_dl(rq); | |
963 | ||
964 | #ifdef CONFIG_SCHED_HRTICK | |
965 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) | |
966 | start_hrtick_dl(rq, p); | |
967 | #endif | |
968 | } | |
969 | ||
970 | static void task_fork_dl(struct task_struct *p) | |
971 | { | |
972 | /* | |
973 | * SCHED_DEADLINE tasks cannot fork and this is achieved through | |
974 | * sched_fork() | |
975 | */ | |
976 | } | |
977 | ||
978 | static void task_dead_dl(struct task_struct *p) | |
979 | { | |
980 | struct hrtimer *timer = &p->dl.dl_timer; | |
981 | ||
982 | if (hrtimer_active(timer)) | |
983 | hrtimer_try_to_cancel(timer); | |
984 | } | |
985 | ||
986 | static void set_curr_task_dl(struct rq *rq) | |
987 | { | |
988 | struct task_struct *p = rq->curr; | |
989 | ||
990 | p->se.exec_start = rq_clock_task(rq); | |
1baca4ce JL |
991 | |
992 | /* You can't push away the running task */ | |
993 | dequeue_pushable_dl_task(rq, p); | |
994 | } | |
995 | ||
996 | #ifdef CONFIG_SMP | |
997 | ||
998 | /* Only try algorithms three times */ | |
999 | #define DL_MAX_TRIES 3 | |
1000 | ||
1001 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) | |
1002 | { | |
1003 | if (!task_running(rq, p) && | |
1004 | (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && | |
1005 | (p->nr_cpus_allowed > 1)) | |
1006 | return 1; | |
1007 | ||
1008 | return 0; | |
1009 | } | |
1010 | ||
1011 | /* Returns the second earliest -deadline task, NULL otherwise */ | |
1012 | static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu) | |
1013 | { | |
1014 | struct rb_node *next_node = rq->dl.rb_leftmost; | |
1015 | struct sched_dl_entity *dl_se; | |
1016 | struct task_struct *p = NULL; | |
1017 | ||
1018 | next_node: | |
1019 | next_node = rb_next(next_node); | |
1020 | if (next_node) { | |
1021 | dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node); | |
1022 | p = dl_task_of(dl_se); | |
1023 | ||
1024 | if (pick_dl_task(rq, p, cpu)) | |
1025 | return p; | |
1026 | ||
1027 | goto next_node; | |
1028 | } | |
1029 | ||
1030 | return NULL; | |
1031 | } | |
1032 | ||
1033 | static int latest_cpu_find(struct cpumask *span, | |
1034 | struct task_struct *task, | |
1035 | struct cpumask *later_mask) | |
1036 | { | |
1037 | const struct sched_dl_entity *dl_se = &task->dl; | |
1038 | int cpu, found = -1, best = 0; | |
1039 | u64 max_dl = 0; | |
1040 | ||
1041 | for_each_cpu(cpu, span) { | |
1042 | struct rq *rq = cpu_rq(cpu); | |
1043 | struct dl_rq *dl_rq = &rq->dl; | |
1044 | ||
1045 | if (cpumask_test_cpu(cpu, &task->cpus_allowed) && | |
1046 | (!dl_rq->dl_nr_running || dl_time_before(dl_se->deadline, | |
1047 | dl_rq->earliest_dl.curr))) { | |
1048 | if (later_mask) | |
1049 | cpumask_set_cpu(cpu, later_mask); | |
1050 | if (!best && !dl_rq->dl_nr_running) { | |
1051 | best = 1; | |
1052 | found = cpu; | |
1053 | } else if (!best && | |
1054 | dl_time_before(max_dl, | |
1055 | dl_rq->earliest_dl.curr)) { | |
1056 | max_dl = dl_rq->earliest_dl.curr; | |
1057 | found = cpu; | |
1058 | } | |
1059 | } else if (later_mask) | |
1060 | cpumask_clear_cpu(cpu, later_mask); | |
1061 | } | |
1062 | ||
1063 | return found; | |
1064 | } | |
1065 | ||
1066 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); | |
1067 | ||
1068 | static int find_later_rq(struct task_struct *task) | |
1069 | { | |
1070 | struct sched_domain *sd; | |
1071 | struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); | |
1072 | int this_cpu = smp_processor_id(); | |
1073 | int best_cpu, cpu = task_cpu(task); | |
1074 | ||
1075 | /* Make sure the mask is initialized first */ | |
1076 | if (unlikely(!later_mask)) | |
1077 | return -1; | |
1078 | ||
1079 | if (task->nr_cpus_allowed == 1) | |
1080 | return -1; | |
1081 | ||
1082 | best_cpu = latest_cpu_find(task_rq(task)->rd->span, task, later_mask); | |
1083 | if (best_cpu == -1) | |
1084 | return -1; | |
1085 | ||
1086 | /* | |
1087 | * If we are here, some target has been found, | |
1088 | * the most suitable of which is cached in best_cpu. | |
1089 | * This is, among the runqueues where the current tasks | |
1090 | * have later deadlines than the task's one, the rq | |
1091 | * with the latest possible one. | |
1092 | * | |
1093 | * Now we check how well this matches with task's | |
1094 | * affinity and system topology. | |
1095 | * | |
1096 | * The last cpu where the task run is our first | |
1097 | * guess, since it is most likely cache-hot there. | |
1098 | */ | |
1099 | if (cpumask_test_cpu(cpu, later_mask)) | |
1100 | return cpu; | |
1101 | /* | |
1102 | * Check if this_cpu is to be skipped (i.e., it is | |
1103 | * not in the mask) or not. | |
1104 | */ | |
1105 | if (!cpumask_test_cpu(this_cpu, later_mask)) | |
1106 | this_cpu = -1; | |
1107 | ||
1108 | rcu_read_lock(); | |
1109 | for_each_domain(cpu, sd) { | |
1110 | if (sd->flags & SD_WAKE_AFFINE) { | |
1111 | ||
1112 | /* | |
1113 | * If possible, preempting this_cpu is | |
1114 | * cheaper than migrating. | |
1115 | */ | |
1116 | if (this_cpu != -1 && | |
1117 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { | |
1118 | rcu_read_unlock(); | |
1119 | return this_cpu; | |
1120 | } | |
1121 | ||
1122 | /* | |
1123 | * Last chance: if best_cpu is valid and is | |
1124 | * in the mask, that becomes our choice. | |
1125 | */ | |
1126 | if (best_cpu < nr_cpu_ids && | |
1127 | cpumask_test_cpu(best_cpu, sched_domain_span(sd))) { | |
1128 | rcu_read_unlock(); | |
1129 | return best_cpu; | |
1130 | } | |
1131 | } | |
1132 | } | |
1133 | rcu_read_unlock(); | |
1134 | ||
1135 | /* | |
1136 | * At this point, all our guesses failed, we just return | |
1137 | * 'something', and let the caller sort the things out. | |
1138 | */ | |
1139 | if (this_cpu != -1) | |
1140 | return this_cpu; | |
1141 | ||
1142 | cpu = cpumask_any(later_mask); | |
1143 | if (cpu < nr_cpu_ids) | |
1144 | return cpu; | |
1145 | ||
1146 | return -1; | |
1147 | } | |
1148 | ||
1149 | /* Locks the rq it finds */ | |
1150 | static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | |
1151 | { | |
1152 | struct rq *later_rq = NULL; | |
1153 | int tries; | |
1154 | int cpu; | |
1155 | ||
1156 | for (tries = 0; tries < DL_MAX_TRIES; tries++) { | |
1157 | cpu = find_later_rq(task); | |
1158 | ||
1159 | if ((cpu == -1) || (cpu == rq->cpu)) | |
1160 | break; | |
1161 | ||
1162 | later_rq = cpu_rq(cpu); | |
1163 | ||
1164 | /* Retry if something changed. */ | |
1165 | if (double_lock_balance(rq, later_rq)) { | |
1166 | if (unlikely(task_rq(task) != rq || | |
1167 | !cpumask_test_cpu(later_rq->cpu, | |
1168 | &task->cpus_allowed) || | |
1169 | task_running(rq, task) || !task->on_rq)) { | |
1170 | double_unlock_balance(rq, later_rq); | |
1171 | later_rq = NULL; | |
1172 | break; | |
1173 | } | |
1174 | } | |
1175 | ||
1176 | /* | |
1177 | * If the rq we found has no -deadline task, or | |
1178 | * its earliest one has a later deadline than our | |
1179 | * task, the rq is a good one. | |
1180 | */ | |
1181 | if (!later_rq->dl.dl_nr_running || | |
1182 | dl_time_before(task->dl.deadline, | |
1183 | later_rq->dl.earliest_dl.curr)) | |
1184 | break; | |
1185 | ||
1186 | /* Otherwise we try again. */ | |
1187 | double_unlock_balance(rq, later_rq); | |
1188 | later_rq = NULL; | |
1189 | } | |
1190 | ||
1191 | return later_rq; | |
1192 | } | |
1193 | ||
1194 | static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) | |
1195 | { | |
1196 | struct task_struct *p; | |
1197 | ||
1198 | if (!has_pushable_dl_tasks(rq)) | |
1199 | return NULL; | |
1200 | ||
1201 | p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, | |
1202 | struct task_struct, pushable_dl_tasks); | |
1203 | ||
1204 | BUG_ON(rq->cpu != task_cpu(p)); | |
1205 | BUG_ON(task_current(rq, p)); | |
1206 | BUG_ON(p->nr_cpus_allowed <= 1); | |
1207 | ||
1208 | BUG_ON(!p->se.on_rq); | |
1209 | BUG_ON(!dl_task(p)); | |
1210 | ||
1211 | return p; | |
1212 | } | |
1213 | ||
1214 | /* | |
1215 | * See if the non running -deadline tasks on this rq | |
1216 | * can be sent to some other CPU where they can preempt | |
1217 | * and start executing. | |
1218 | */ | |
1219 | static int push_dl_task(struct rq *rq) | |
1220 | { | |
1221 | struct task_struct *next_task; | |
1222 | struct rq *later_rq; | |
1223 | ||
1224 | if (!rq->dl.overloaded) | |
1225 | return 0; | |
1226 | ||
1227 | next_task = pick_next_pushable_dl_task(rq); | |
1228 | if (!next_task) | |
1229 | return 0; | |
1230 | ||
1231 | retry: | |
1232 | if (unlikely(next_task == rq->curr)) { | |
1233 | WARN_ON(1); | |
1234 | return 0; | |
1235 | } | |
1236 | ||
1237 | /* | |
1238 | * If next_task preempts rq->curr, and rq->curr | |
1239 | * can move away, it makes sense to just reschedule | |
1240 | * without going further in pushing next_task. | |
1241 | */ | |
1242 | if (dl_task(rq->curr) && | |
1243 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && | |
1244 | rq->curr->nr_cpus_allowed > 1) { | |
1245 | resched_task(rq->curr); | |
1246 | return 0; | |
1247 | } | |
1248 | ||
1249 | /* We might release rq lock */ | |
1250 | get_task_struct(next_task); | |
1251 | ||
1252 | /* Will lock the rq it'll find */ | |
1253 | later_rq = find_lock_later_rq(next_task, rq); | |
1254 | if (!later_rq) { | |
1255 | struct task_struct *task; | |
1256 | ||
1257 | /* | |
1258 | * We must check all this again, since | |
1259 | * find_lock_later_rq releases rq->lock and it is | |
1260 | * then possible that next_task has migrated. | |
1261 | */ | |
1262 | task = pick_next_pushable_dl_task(rq); | |
1263 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | |
1264 | /* | |
1265 | * The task is still there. We don't try | |
1266 | * again, some other cpu will pull it when ready. | |
1267 | */ | |
1268 | dequeue_pushable_dl_task(rq, next_task); | |
1269 | goto out; | |
1270 | } | |
1271 | ||
1272 | if (!task) | |
1273 | /* No more tasks */ | |
1274 | goto out; | |
1275 | ||
1276 | put_task_struct(next_task); | |
1277 | next_task = task; | |
1278 | goto retry; | |
1279 | } | |
1280 | ||
1281 | deactivate_task(rq, next_task, 0); | |
1282 | set_task_cpu(next_task, later_rq->cpu); | |
1283 | activate_task(later_rq, next_task, 0); | |
1284 | ||
1285 | resched_task(later_rq->curr); | |
1286 | ||
1287 | double_unlock_balance(rq, later_rq); | |
1288 | ||
1289 | out: | |
1290 | put_task_struct(next_task); | |
1291 | ||
1292 | return 1; | |
1293 | } | |
1294 | ||
1295 | static void push_dl_tasks(struct rq *rq) | |
1296 | { | |
1297 | /* Terminates as it moves a -deadline task */ | |
1298 | while (push_dl_task(rq)) | |
1299 | ; | |
aab03e05 DF |
1300 | } |
1301 | ||
1baca4ce JL |
1302 | static int pull_dl_task(struct rq *this_rq) |
1303 | { | |
1304 | int this_cpu = this_rq->cpu, ret = 0, cpu; | |
1305 | struct task_struct *p; | |
1306 | struct rq *src_rq; | |
1307 | u64 dmin = LONG_MAX; | |
1308 | ||
1309 | if (likely(!dl_overloaded(this_rq))) | |
1310 | return 0; | |
1311 | ||
1312 | /* | |
1313 | * Match the barrier from dl_set_overloaded; this guarantees that if we | |
1314 | * see overloaded we must also see the dlo_mask bit. | |
1315 | */ | |
1316 | smp_rmb(); | |
1317 | ||
1318 | for_each_cpu(cpu, this_rq->rd->dlo_mask) { | |
1319 | if (this_cpu == cpu) | |
1320 | continue; | |
1321 | ||
1322 | src_rq = cpu_rq(cpu); | |
1323 | ||
1324 | /* | |
1325 | * It looks racy, abd it is! However, as in sched_rt.c, | |
1326 | * we are fine with this. | |
1327 | */ | |
1328 | if (this_rq->dl.dl_nr_running && | |
1329 | dl_time_before(this_rq->dl.earliest_dl.curr, | |
1330 | src_rq->dl.earliest_dl.next)) | |
1331 | continue; | |
1332 | ||
1333 | /* Might drop this_rq->lock */ | |
1334 | double_lock_balance(this_rq, src_rq); | |
1335 | ||
1336 | /* | |
1337 | * If there are no more pullable tasks on the | |
1338 | * rq, we're done with it. | |
1339 | */ | |
1340 | if (src_rq->dl.dl_nr_running <= 1) | |
1341 | goto skip; | |
1342 | ||
1343 | p = pick_next_earliest_dl_task(src_rq, this_cpu); | |
1344 | ||
1345 | /* | |
1346 | * We found a task to be pulled if: | |
1347 | * - it preempts our current (if there's one), | |
1348 | * - it will preempt the last one we pulled (if any). | |
1349 | */ | |
1350 | if (p && dl_time_before(p->dl.deadline, dmin) && | |
1351 | (!this_rq->dl.dl_nr_running || | |
1352 | dl_time_before(p->dl.deadline, | |
1353 | this_rq->dl.earliest_dl.curr))) { | |
1354 | WARN_ON(p == src_rq->curr); | |
1355 | WARN_ON(!p->se.on_rq); | |
1356 | ||
1357 | /* | |
1358 | * Then we pull iff p has actually an earlier | |
1359 | * deadline than the current task of its runqueue. | |
1360 | */ | |
1361 | if (dl_time_before(p->dl.deadline, | |
1362 | src_rq->curr->dl.deadline)) | |
1363 | goto skip; | |
1364 | ||
1365 | ret = 1; | |
1366 | ||
1367 | deactivate_task(src_rq, p, 0); | |
1368 | set_task_cpu(p, this_cpu); | |
1369 | activate_task(this_rq, p, 0); | |
1370 | dmin = p->dl.deadline; | |
1371 | ||
1372 | /* Is there any other task even earlier? */ | |
1373 | } | |
1374 | skip: | |
1375 | double_unlock_balance(this_rq, src_rq); | |
1376 | } | |
1377 | ||
1378 | return ret; | |
1379 | } | |
1380 | ||
1381 | static void pre_schedule_dl(struct rq *rq, struct task_struct *prev) | |
1382 | { | |
1383 | /* Try to pull other tasks here */ | |
1384 | if (dl_task(prev)) | |
1385 | pull_dl_task(rq); | |
1386 | } | |
1387 | ||
1388 | static void post_schedule_dl(struct rq *rq) | |
1389 | { | |
1390 | push_dl_tasks(rq); | |
1391 | } | |
1392 | ||
1393 | /* | |
1394 | * Since the task is not running and a reschedule is not going to happen | |
1395 | * anytime soon on its runqueue, we try pushing it away now. | |
1396 | */ | |
1397 | static void task_woken_dl(struct rq *rq, struct task_struct *p) | |
1398 | { | |
1399 | if (!task_running(rq, p) && | |
1400 | !test_tsk_need_resched(rq->curr) && | |
1401 | has_pushable_dl_tasks(rq) && | |
1402 | p->nr_cpus_allowed > 1 && | |
1403 | dl_task(rq->curr) && | |
1404 | (rq->curr->nr_cpus_allowed < 2 || | |
1405 | dl_entity_preempt(&rq->curr->dl, &p->dl))) { | |
1406 | push_dl_tasks(rq); | |
1407 | } | |
1408 | } | |
1409 | ||
1410 | static void set_cpus_allowed_dl(struct task_struct *p, | |
1411 | const struct cpumask *new_mask) | |
1412 | { | |
1413 | struct rq *rq; | |
1414 | int weight; | |
1415 | ||
1416 | BUG_ON(!dl_task(p)); | |
1417 | ||
1418 | /* | |
1419 | * Update only if the task is actually running (i.e., | |
1420 | * it is on the rq AND it is not throttled). | |
1421 | */ | |
1422 | if (!on_dl_rq(&p->dl)) | |
1423 | return; | |
1424 | ||
1425 | weight = cpumask_weight(new_mask); | |
1426 | ||
1427 | /* | |
1428 | * Only update if the process changes its state from whether it | |
1429 | * can migrate or not. | |
1430 | */ | |
1431 | if ((p->nr_cpus_allowed > 1) == (weight > 1)) | |
1432 | return; | |
1433 | ||
1434 | rq = task_rq(p); | |
1435 | ||
1436 | /* | |
1437 | * The process used to be able to migrate OR it can now migrate | |
1438 | */ | |
1439 | if (weight <= 1) { | |
1440 | if (!task_current(rq, p)) | |
1441 | dequeue_pushable_dl_task(rq, p); | |
1442 | BUG_ON(!rq->dl.dl_nr_migratory); | |
1443 | rq->dl.dl_nr_migratory--; | |
1444 | } else { | |
1445 | if (!task_current(rq, p)) | |
1446 | enqueue_pushable_dl_task(rq, p); | |
1447 | rq->dl.dl_nr_migratory++; | |
1448 | } | |
1449 | ||
1450 | update_dl_migration(&rq->dl); | |
1451 | } | |
1452 | ||
1453 | /* Assumes rq->lock is held */ | |
1454 | static void rq_online_dl(struct rq *rq) | |
1455 | { | |
1456 | if (rq->dl.overloaded) | |
1457 | dl_set_overload(rq); | |
1458 | } | |
1459 | ||
1460 | /* Assumes rq->lock is held */ | |
1461 | static void rq_offline_dl(struct rq *rq) | |
1462 | { | |
1463 | if (rq->dl.overloaded) | |
1464 | dl_clear_overload(rq); | |
1465 | } | |
1466 | ||
1467 | void init_sched_dl_class(void) | |
1468 | { | |
1469 | unsigned int i; | |
1470 | ||
1471 | for_each_possible_cpu(i) | |
1472 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), | |
1473 | GFP_KERNEL, cpu_to_node(i)); | |
1474 | } | |
1475 | ||
1476 | #endif /* CONFIG_SMP */ | |
1477 | ||
aab03e05 DF |
1478 | static void switched_from_dl(struct rq *rq, struct task_struct *p) |
1479 | { | |
1baca4ce | 1480 | if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy)) |
aab03e05 | 1481 | hrtimer_try_to_cancel(&p->dl.dl_timer); |
1baca4ce JL |
1482 | |
1483 | #ifdef CONFIG_SMP | |
1484 | /* | |
1485 | * Since this might be the only -deadline task on the rq, | |
1486 | * this is the right place to try to pull some other one | |
1487 | * from an overloaded cpu, if any. | |
1488 | */ | |
1489 | if (!rq->dl.dl_nr_running) | |
1490 | pull_dl_task(rq); | |
1491 | #endif | |
aab03e05 DF |
1492 | } |
1493 | ||
1baca4ce JL |
1494 | /* |
1495 | * When switching to -deadline, we may overload the rq, then | |
1496 | * we try to push someone off, if possible. | |
1497 | */ | |
aab03e05 DF |
1498 | static void switched_to_dl(struct rq *rq, struct task_struct *p) |
1499 | { | |
1baca4ce JL |
1500 | int check_resched = 1; |
1501 | ||
aab03e05 DF |
1502 | /* |
1503 | * If p is throttled, don't consider the possibility | |
1504 | * of preempting rq->curr, the check will be done right | |
1505 | * after its runtime will get replenished. | |
1506 | */ | |
1507 | if (unlikely(p->dl.dl_throttled)) | |
1508 | return; | |
1509 | ||
1510 | if (p->on_rq || rq->curr != p) { | |
1baca4ce JL |
1511 | #ifdef CONFIG_SMP |
1512 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) | |
1513 | /* Only reschedule if pushing failed */ | |
1514 | check_resched = 0; | |
1515 | #endif /* CONFIG_SMP */ | |
1516 | if (check_resched && task_has_dl_policy(rq->curr)) | |
aab03e05 | 1517 | check_preempt_curr_dl(rq, p, 0); |
aab03e05 DF |
1518 | } |
1519 | } | |
1520 | ||
1baca4ce JL |
1521 | /* |
1522 | * If the scheduling parameters of a -deadline task changed, | |
1523 | * a push or pull operation might be needed. | |
1524 | */ | |
aab03e05 DF |
1525 | static void prio_changed_dl(struct rq *rq, struct task_struct *p, |
1526 | int oldprio) | |
1527 | { | |
1baca4ce | 1528 | if (p->on_rq || rq->curr == p) { |
aab03e05 | 1529 | #ifdef CONFIG_SMP |
1baca4ce JL |
1530 | /* |
1531 | * This might be too much, but unfortunately | |
1532 | * we don't have the old deadline value, and | |
1533 | * we can't argue if the task is increasing | |
1534 | * or lowering its prio, so... | |
1535 | */ | |
1536 | if (!rq->dl.overloaded) | |
1537 | pull_dl_task(rq); | |
1538 | ||
1539 | /* | |
1540 | * If we now have a earlier deadline task than p, | |
1541 | * then reschedule, provided p is still on this | |
1542 | * runqueue. | |
1543 | */ | |
1544 | if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && | |
1545 | rq->curr == p) | |
1546 | resched_task(p); | |
1547 | #else | |
1548 | /* | |
1549 | * Again, we don't know if p has a earlier | |
1550 | * or later deadline, so let's blindly set a | |
1551 | * (maybe not needed) rescheduling point. | |
1552 | */ | |
1553 | resched_task(p); | |
1554 | #endif /* CONFIG_SMP */ | |
1555 | } else | |
1556 | switched_to_dl(rq, p); | |
aab03e05 | 1557 | } |
aab03e05 DF |
1558 | |
1559 | const struct sched_class dl_sched_class = { | |
1560 | .next = &rt_sched_class, | |
1561 | .enqueue_task = enqueue_task_dl, | |
1562 | .dequeue_task = dequeue_task_dl, | |
1563 | .yield_task = yield_task_dl, | |
1564 | ||
1565 | .check_preempt_curr = check_preempt_curr_dl, | |
1566 | ||
1567 | .pick_next_task = pick_next_task_dl, | |
1568 | .put_prev_task = put_prev_task_dl, | |
1569 | ||
1570 | #ifdef CONFIG_SMP | |
1571 | .select_task_rq = select_task_rq_dl, | |
1baca4ce JL |
1572 | .set_cpus_allowed = set_cpus_allowed_dl, |
1573 | .rq_online = rq_online_dl, | |
1574 | .rq_offline = rq_offline_dl, | |
1575 | .pre_schedule = pre_schedule_dl, | |
1576 | .post_schedule = post_schedule_dl, | |
1577 | .task_woken = task_woken_dl, | |
aab03e05 DF |
1578 | #endif |
1579 | ||
1580 | .set_curr_task = set_curr_task_dl, | |
1581 | .task_tick = task_tick_dl, | |
1582 | .task_fork = task_fork_dl, | |
1583 | .task_dead = task_dead_dl, | |
1584 | ||
1585 | .prio_changed = prio_changed_dl, | |
1586 | .switched_from = switched_from_dl, | |
1587 | .switched_to = switched_to_dl, | |
1588 | }; |