posix-cpu-timers: Sample directly in timer check
[linux-2.6-block.git] / kernel / time / posix-cpu-timers.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
1da177e4
LT
2/*
3 * Implement CPU time clocks for the POSIX clock interface.
4 */
5
3f07c014 6#include <linux/sched/signal.h>
32ef5517 7#include <linux/sched/cputime.h>
1da177e4 8#include <linux/posix-timers.h>
1da177e4 9#include <linux/errno.h>
f8bd2258 10#include <linux/math64.h>
7c0f6ba6 11#include <linux/uaccess.h>
bb34d92f 12#include <linux/kernel_stat.h>
3f0a525e 13#include <trace/events/timer.h>
a8572160
FW
14#include <linux/tick.h>
15#include <linux/workqueue.h>
edbeda46 16#include <linux/compat.h>
34be3930 17#include <linux/sched/deadline.h>
1da177e4 18
bab0aae9
TG
19#include "posix-timers.h"
20
f37fb0aa
TG
21static void posix_cpu_timer_rearm(struct k_itimer *timer);
22
f06febc9 23/*
f55db609
SG
24 * Called after updating RLIMIT_CPU to run cpu timer and update
25 * tsk->signal->cputime_expires expiration cache if necessary. Needs
26 * siglock protection since other code may update expiration cache as
27 * well.
f06febc9 28 */
5ab46b34 29void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
f06febc9 30{
858cf3a8 31 u64 nsecs = rlim_new * NSEC_PER_SEC;
f06febc9 32
5ab46b34 33 spin_lock_irq(&task->sighand->siglock);
858cf3a8 34 set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
5ab46b34 35 spin_unlock_irq(&task->sighand->siglock);
f06febc9
FM
36}
37
6ae40e3f
TG
38/*
39 * Functions for validating access to tasks.
40 */
41static struct task_struct *lookup_task(const pid_t pid, bool thread)
1da177e4 42{
1da177e4 43 struct task_struct *p;
1da177e4 44
6ae40e3f
TG
45 if (!pid)
46 return thread ? current : current->group_leader;
1da177e4 47
6ae40e3f
TG
48 p = find_task_by_vpid(pid);
49 if (!p || p == current)
50 return p;
51 if (thread)
52 return same_thread_group(p, current) ? p : NULL;
53 if (p == current)
54 return p;
55 return has_group_leader_pid(p) ? p : NULL;
56}
57
58static struct task_struct *__get_task_for_clock(const clockid_t clock,
59 bool getref)
60{
61 const bool thread = !!CPUCLOCK_PERTHREAD(clock);
62 const pid_t pid = CPUCLOCK_PID(clock);
63 struct task_struct *p;
64
65 if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
66 return NULL;
1da177e4 67
c0deae8c 68 rcu_read_lock();
6ae40e3f
TG
69 p = lookup_task(pid, thread);
70 if (p && getref)
71 get_task_struct(p);
c0deae8c 72 rcu_read_unlock();
6ae40e3f
TG
73 return p;
74}
1da177e4 75
6ae40e3f
TG
76static inline struct task_struct *get_task_for_clock(const clockid_t clock)
77{
78 return __get_task_for_clock(clock, true);
79}
80
81static inline int validate_clock_permissions(const clockid_t clock)
82{
83 return __get_task_for_clock(clock, false) ? 0 : -EINVAL;
1da177e4
LT
84}
85
1da177e4
LT
86/*
87 * Update expiry time from increment, and increase overrun count,
88 * given the current clock sample.
89 */
ebd7e7fc 90static void bump_cpu_timer(struct k_itimer *timer, u64 now)
1da177e4
LT
91{
92 int i;
ebd7e7fc 93 u64 delta, incr;
1da177e4 94
16118794 95 if (!timer->it_interval)
1da177e4
LT
96 return;
97
55ccb616
FW
98 if (now < timer->it.cpu.expires)
99 return;
1da177e4 100
16118794 101 incr = timer->it_interval;
55ccb616 102 delta = now + incr - timer->it.cpu.expires;
1da177e4 103
55ccb616
FW
104 /* Don't use (incr*2 < delta), incr*2 might overflow. */
105 for (i = 0; incr < delta - incr; i++)
106 incr = incr << 1;
107
108 for (; i >= 0; incr >>= 1, i--) {
109 if (delta < incr)
110 continue;
111
112 timer->it.cpu.expires += incr;
78c9c4df 113 timer->it_overrun += 1LL << i;
55ccb616 114 delta -= incr;
1da177e4
LT
115 }
116}
117
555347f6
FW
118/**
119 * task_cputime_zero - Check a task_cputime struct for all zero fields.
120 *
121 * @cputime: The struct to compare.
122 *
123 * Checks @cputime to see if all fields are zero. Returns true if all fields
124 * are zero, false if any field is nonzero.
125 */
ebd7e7fc 126static inline int task_cputime_zero(const struct task_cputime *cputime)
555347f6
FW
127{
128 if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
129 return 1;
130 return 0;
131}
132
ebd7e7fc 133static inline u64 prof_ticks(struct task_struct *p)
1da177e4 134{
ebd7e7fc 135 u64 utime, stime;
6fac4829 136
ebd7e7fc 137 task_cputime(p, &utime, &stime);
6fac4829 138
ebd7e7fc 139 return utime + stime;
1da177e4 140}
ebd7e7fc 141static inline u64 virt_ticks(struct task_struct *p)
1da177e4 142{
ebd7e7fc 143 u64 utime, stime;
6fac4829 144
ebd7e7fc 145 task_cputime(p, &utime, &stime);
6fac4829 146
ebd7e7fc 147 return utime;
1da177e4 148}
1da177e4 149
bc2c8ea4 150static int
d2e3e0ca 151posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
1da177e4 152{
6ae40e3f
TG
153 int error = validate_clock_permissions(which_clock);
154
1da177e4
LT
155 if (!error) {
156 tp->tv_sec = 0;
157 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
158 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
159 /*
160 * If sched_clock is using a cycle counter, we
161 * don't have any idea of its true resolution
162 * exported, but it is much more than 1s/HZ.
163 */
164 tp->tv_nsec = 1;
165 }
166 }
167 return error;
168}
169
bc2c8ea4 170static int
6ae40e3f 171posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
1da177e4 172{
6ae40e3f
TG
173 int error = validate_clock_permissions(clock);
174
1da177e4
LT
175 /*
176 * You can never reset a CPU clock, but we check for other errors
177 * in the call before failing with EPERM.
178 */
6ae40e3f 179 return error ? : -EPERM;
1da177e4
LT
180}
181
1da177e4
LT
182/*
183 * Sample a per-thread clock for the given task.
184 */
ebd7e7fc
FW
185static int cpu_clock_sample(const clockid_t which_clock,
186 struct task_struct *p, u64 *sample)
1da177e4
LT
187{
188 switch (CPUCLOCK_WHICH(which_clock)) {
189 default:
190 return -EINVAL;
191 case CPUCLOCK_PROF:
55ccb616 192 *sample = prof_ticks(p);
1da177e4
LT
193 break;
194 case CPUCLOCK_VIRT:
55ccb616 195 *sample = virt_ticks(p);
1da177e4
LT
196 break;
197 case CPUCLOCK_SCHED:
55ccb616 198 *sample = task_sched_runtime(p);
1da177e4
LT
199 break;
200 }
201 return 0;
202}
203
1018016c
JL
204/*
205 * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
206 * to avoid race conditions with concurrent updates to cputime.
207 */
208static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
4da94d49 209{
1018016c
JL
210 u64 curr_cputime;
211retry:
212 curr_cputime = atomic64_read(cputime);
213 if (sum_cputime > curr_cputime) {
214 if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
215 goto retry;
216 }
217}
4da94d49 218
ebd7e7fc 219static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)
1018016c 220{
71107445
JL
221 __update_gt_cputime(&cputime_atomic->utime, sum->utime);
222 __update_gt_cputime(&cputime_atomic->stime, sum->stime);
223 __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
1018016c 224}
4da94d49 225
71107445 226/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
ebd7e7fc 227static inline void sample_cputime_atomic(struct task_cputime *times,
71107445 228 struct task_cputime_atomic *atomic_times)
1018016c 229{
71107445
JL
230 times->utime = atomic64_read(&atomic_times->utime);
231 times->stime = atomic64_read(&atomic_times->stime);
232 times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
4da94d49
PZ
233}
234
19298fbf
TG
235/**
236 * thread_group_sample_cputime - Sample cputime for a given task
237 * @tsk: Task for which cputime needs to be started
238 * @iimes: Storage for time samples
239 *
240 * Called from sys_getitimer() to calculate the expiry time of an active
241 * timer. That means group cputime accounting is already active. Called
242 * with task sighand lock held.
243 *
244 * Updates @times with an uptodate sample of the thread group cputimes.
245 */
246void thread_group_sample_cputime(struct task_struct *tsk,
247 struct task_cputime *times)
248{
249 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
250
251 WARN_ON_ONCE(!cputimer->running);
252
253 sample_cputime_atomic(times, &cputimer->cputime_atomic);
254}
255
ebd7e7fc 256void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
4da94d49
PZ
257{
258 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
ebd7e7fc 259 struct task_cputime sum;
4da94d49 260
1018016c
JL
261 /* Check if cputimer isn't running. This is accessed without locking. */
262 if (!READ_ONCE(cputimer->running)) {
4da94d49
PZ
263 /*
264 * The POSIX timer interface allows for absolute time expiry
265 * values through the TIMER_ABSTIME flag, therefore we have
1018016c 266 * to synchronize the timer to the clock every time we start it.
4da94d49 267 */
ebd7e7fc 268 thread_group_cputime(tsk, &sum);
71107445 269 update_gt_cputime(&cputimer->cputime_atomic, &sum);
1018016c
JL
270
271 /*
272 * We're setting cputimer->running without a lock. Ensure
273 * this only gets written to in one operation. We set
274 * running after update_gt_cputime() as a small optimization,
275 * but barriers are not required because update_gt_cputime()
276 * can handle concurrent updates.
277 */
d5c373eb 278 WRITE_ONCE(cputimer->running, true);
1018016c 279 }
71107445 280 sample_cputime_atomic(times, &cputimer->cputime_atomic);
4da94d49
PZ
281}
282
1da177e4
LT
283/*
284 * Sample a process (thread group) clock for the given group_leader task.
e73d84e3
FW
285 * Must be called with task sighand lock held for safe while_each_thread()
286 * traversal.
1da177e4 287 */
bb34d92f
FM
288static int cpu_clock_sample_group(const clockid_t which_clock,
289 struct task_struct *p,
ebd7e7fc 290 u64 *sample)
1da177e4 291{
ebd7e7fc 292 struct task_cputime cputime;
f06febc9 293
eccdaeaf 294 switch (CPUCLOCK_WHICH(which_clock)) {
1da177e4
LT
295 default:
296 return -EINVAL;
297 case CPUCLOCK_PROF:
ebd7e7fc
FW
298 thread_group_cputime(p, &cputime);
299 *sample = cputime.utime + cputime.stime;
1da177e4
LT
300 break;
301 case CPUCLOCK_VIRT:
ebd7e7fc
FW
302 thread_group_cputime(p, &cputime);
303 *sample = cputime.utime;
1da177e4
LT
304 break;
305 case CPUCLOCK_SCHED:
ebd7e7fc 306 thread_group_cputime(p, &cputime);
55ccb616 307 *sample = cputime.sum_exec_runtime;
1da177e4
LT
308 break;
309 }
310 return 0;
311}
312
bfcf3e92 313static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
33ab0fec 314{
bfcf3e92
TG
315 const clockid_t clkid = CPUCLOCK_WHICH(clock);
316 struct task_struct *tsk;
317 u64 t;
33ab0fec 318
bfcf3e92
TG
319 tsk = get_task_for_clock(clock);
320 if (!tsk)
321 return -EINVAL;
1da177e4 322
bfcf3e92
TG
323 if (CPUCLOCK_PERTHREAD(clock))
324 cpu_clock_sample(clkid, tsk, &t);
325 else
326 cpu_clock_sample_group(clkid, tsk, &t);
327 put_task_struct(tsk);
1da177e4 328
bfcf3e92
TG
329 *tp = ns_to_timespec64(t);
330 return 0;
1da177e4
LT
331}
332
1da177e4
LT
333/*
334 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
ba5ea951
SG
335 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
336 * new timer already all-zeros initialized.
1da177e4 337 */
bc2c8ea4 338static int posix_cpu_timer_create(struct k_itimer *new_timer)
1da177e4 339{
e5a8b65b 340 struct task_struct *p = get_task_for_clock(new_timer->it_clock);
1da177e4 341
e5a8b65b 342 if (!p)
1da177e4
LT
343 return -EINVAL;
344
d97bb75d 345 new_timer->kclock = &clock_posix_cpu;
1da177e4 346 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
1da177e4 347 new_timer->it.cpu.task = p;
e5a8b65b 348 return 0;
1da177e4
LT
349}
350
351/*
352 * Clean up a CPU-clock timer that is about to be destroyed.
353 * This is called from timer deletion with the timer already locked.
354 * If we return TIMER_RETRY, it's necessary to release the timer's lock
355 * and try again. (This happens when the timer is in the middle of firing.)
356 */
bc2c8ea4 357static int posix_cpu_timer_del(struct k_itimer *timer)
1da177e4 358{
108150ea 359 int ret = 0;
3d7a1427
FW
360 unsigned long flags;
361 struct sighand_struct *sighand;
362 struct task_struct *p = timer->it.cpu.task;
1da177e4 363
692117c1
TG
364 if (WARN_ON_ONCE(!p))
365 return -EINVAL;
108150ea 366
3d7a1427
FW
367 /*
368 * Protect against sighand release/switch in exit/exec and process/
369 * thread timer list entry concurrent read/writes.
370 */
371 sighand = lock_task_sighand(p, &flags);
372 if (unlikely(sighand == NULL)) {
a3222f88
FW
373 /*
374 * We raced with the reaping of the task.
375 * The deletion should have cleared us off the list.
376 */
531f64fd 377 WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
a3222f88 378 } else {
a3222f88
FW
379 if (timer->it.cpu.firing)
380 ret = TIMER_RETRY;
381 else
382 list_del(&timer->it.cpu.entry);
3d7a1427
FW
383
384 unlock_task_sighand(p, &flags);
1da177e4 385 }
a3222f88
FW
386
387 if (!ret)
388 put_task_struct(p);
1da177e4 389
108150ea 390 return ret;
1da177e4
LT
391}
392
af82eb3c 393static void cleanup_timers_list(struct list_head *head)
1a7fa510
FW
394{
395 struct cpu_timer_list *timer, *next;
396
a0b2062b 397 list_for_each_entry_safe(timer, next, head, entry)
1a7fa510 398 list_del_init(&timer->entry);
1a7fa510
FW
399}
400
1da177e4 401/*
7cb9a94c
TG
402 * Clean out CPU timers which are still armed when a thread exits. The
403 * timers are only removed from the list. No other updates are done. The
404 * corresponding posix timers are still accessible, but cannot be rearmed.
405 *
1da177e4
LT
406 * This must be called with the siglock held.
407 */
af82eb3c 408static void cleanup_timers(struct list_head *head)
1da177e4 409{
af82eb3c
FW
410 cleanup_timers_list(head);
411 cleanup_timers_list(++head);
412 cleanup_timers_list(++head);
1da177e4
LT
413}
414
415/*
416 * These are both called with the siglock held, when the current thread
417 * is being reaped. When the final (leader) thread in the group is reaped,
418 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
419 */
420void posix_cpu_timers_exit(struct task_struct *tsk)
421{
af82eb3c 422 cleanup_timers(tsk->cpu_timers);
1da177e4
LT
423}
424void posix_cpu_timers_exit_group(struct task_struct *tsk)
425{
af82eb3c 426 cleanup_timers(tsk->signal->cpu_timers);
1da177e4
LT
427}
428
ebd7e7fc 429static inline int expires_gt(u64 expires, u64 new_exp)
d1e3b6d1 430{
64861634 431 return expires == 0 || expires > new_exp;
d1e3b6d1
SG
432}
433
1da177e4
LT
434/*
435 * Insert the timer on the appropriate list before any timers that
e73d84e3 436 * expire later. This must be called with the sighand lock held.
1da177e4 437 */
5eb9aa64 438static void arm_timer(struct k_itimer *timer)
1da177e4
LT
439{
440 struct task_struct *p = timer->it.cpu.task;
441 struct list_head *head, *listpos;
ebd7e7fc 442 struct task_cputime *cputime_expires;
1da177e4
LT
443 struct cpu_timer_list *const nt = &timer->it.cpu;
444 struct cpu_timer_list *next;
1da177e4 445
5eb9aa64
SG
446 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
447 head = p->cpu_timers;
448 cputime_expires = &p->cputime_expires;
449 } else {
450 head = p->signal->cpu_timers;
451 cputime_expires = &p->signal->cputime_expires;
452 }
1da177e4
LT
453 head += CPUCLOCK_WHICH(timer->it_clock);
454
1da177e4 455 listpos = head;
5eb9aa64 456 list_for_each_entry(next, head, entry) {
55ccb616 457 if (nt->expires < next->expires)
5eb9aa64
SG
458 break;
459 listpos = &next->entry;
1da177e4
LT
460 }
461 list_add(&nt->entry, listpos);
462
463 if (listpos == head) {
ebd7e7fc 464 u64 exp = nt->expires;
5eb9aa64 465
1da177e4 466 /*
5eb9aa64
SG
467 * We are the new earliest-expiring POSIX 1.b timer, hence
468 * need to update expiration cache. Take into account that
469 * for process timers we share expiration cache with itimers
470 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
1da177e4
LT
471 */
472
5eb9aa64
SG
473 switch (CPUCLOCK_WHICH(timer->it_clock)) {
474 case CPUCLOCK_PROF:
ebd7e7fc
FW
475 if (expires_gt(cputime_expires->prof_exp, exp))
476 cputime_expires->prof_exp = exp;
5eb9aa64
SG
477 break;
478 case CPUCLOCK_VIRT:
ebd7e7fc
FW
479 if (expires_gt(cputime_expires->virt_exp, exp))
480 cputime_expires->virt_exp = exp;
5eb9aa64
SG
481 break;
482 case CPUCLOCK_SCHED:
ebd7e7fc 483 if (expires_gt(cputime_expires->sched_exp, exp))
55ccb616 484 cputime_expires->sched_exp = exp;
5eb9aa64 485 break;
1da177e4 486 }
b7878300
FW
487 if (CPUCLOCK_PERTHREAD(timer->it_clock))
488 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
489 else
490 tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
1da177e4 491 }
1da177e4
LT
492}
493
494/*
495 * The timer is locked, fire it and arrange for its reload.
496 */
497static void cpu_timer_fire(struct k_itimer *timer)
498{
1f169f84
SG
499 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
500 /*
501 * User don't want any signal.
502 */
55ccb616 503 timer->it.cpu.expires = 0;
1f169f84 504 } else if (unlikely(timer->sigq == NULL)) {
1da177e4
LT
505 /*
506 * This a special case for clock_nanosleep,
507 * not a normal timer from sys_timer_create.
508 */
509 wake_up_process(timer->it_process);
55ccb616 510 timer->it.cpu.expires = 0;
16118794 511 } else if (!timer->it_interval) {
1da177e4
LT
512 /*
513 * One-shot timer. Clear it as soon as it's fired.
514 */
515 posix_timer_event(timer, 0);
55ccb616 516 timer->it.cpu.expires = 0;
1da177e4
LT
517 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
518 /*
519 * The signal did not get queued because the signal
520 * was ignored, so we won't get any callback to
521 * reload the timer. But we need to keep it
522 * ticking in case the signal is deliverable next time.
523 */
f37fb0aa 524 posix_cpu_timer_rearm(timer);
af888d67 525 ++timer->it_requeue_pending;
1da177e4
LT
526 }
527}
528
3997ad31
PZ
529/*
530 * Sample a process (thread group) timer for the given group_leader task.
e73d84e3
FW
531 * Must be called with task sighand lock held for safe while_each_thread()
532 * traversal.
3997ad31
PZ
533 */
534static int cpu_timer_sample_group(const clockid_t which_clock,
ebd7e7fc 535 struct task_struct *p, u64 *sample)
3997ad31 536{
ebd7e7fc 537 struct task_cputime cputime;
3997ad31
PZ
538
539 thread_group_cputimer(p, &cputime);
540 switch (CPUCLOCK_WHICH(which_clock)) {
541 default:
542 return -EINVAL;
543 case CPUCLOCK_PROF:
ebd7e7fc 544 *sample = cputime.utime + cputime.stime;
3997ad31
PZ
545 break;
546 case CPUCLOCK_VIRT:
ebd7e7fc 547 *sample = cputime.utime;
3997ad31
PZ
548 break;
549 case CPUCLOCK_SCHED:
23cfa361 550 *sample = cputime.sum_exec_runtime;
3997ad31
PZ
551 break;
552 }
553 return 0;
554}
555
1da177e4
LT
556/*
557 * Guts of sys_timer_settime for CPU timers.
558 * This is called with the timer locked and interrupts disabled.
559 * If we return TIMER_RETRY, it's necessary to release the timer's lock
560 * and try again. (This happens when the timer is in the middle of firing.)
561 */
e73d84e3 562static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
5f252b32 563 struct itimerspec64 *new, struct itimerspec64 *old)
1da177e4 564{
e73d84e3
FW
565 unsigned long flags;
566 struct sighand_struct *sighand;
1da177e4 567 struct task_struct *p = timer->it.cpu.task;
ebd7e7fc 568 u64 old_expires, new_expires, old_incr, val;
1da177e4
LT
569 int ret;
570
692117c1
TG
571 if (WARN_ON_ONCE(!p))
572 return -EINVAL;
1da177e4 573
098b0e01
TG
574 /*
575 * Use the to_ktime conversion because that clamps the maximum
576 * value to KTIME_MAX and avoid multiplication overflows.
577 */
578 new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value));
1da177e4 579
1da177e4 580 /*
e73d84e3
FW
581 * Protect against sighand release/switch in exit/exec and p->cpu_timers
582 * and p->signal->cpu_timers read/write in arm_timer()
583 */
584 sighand = lock_task_sighand(p, &flags);
585 /*
586 * If p has just been reaped, we can no
1da177e4
LT
587 * longer get any information about it at all.
588 */
e73d84e3 589 if (unlikely(sighand == NULL)) {
1da177e4
LT
590 return -ESRCH;
591 }
592
593 /*
594 * Disarm any old timer after extracting its expiry time.
595 */
a69ac4a7
ON
596
597 ret = 0;
16118794 598 old_incr = timer->it_interval;
1da177e4 599 old_expires = timer->it.cpu.expires;
a69ac4a7
ON
600 if (unlikely(timer->it.cpu.firing)) {
601 timer->it.cpu.firing = -1;
602 ret = TIMER_RETRY;
603 } else
604 list_del_init(&timer->it.cpu.entry);
1da177e4
LT
605
606 /*
607 * We need to sample the current value to convert the new
608 * value from to relative and absolute, and to convert the
609 * old value from absolute to relative. To set a process
610 * timer, we need a sample to balance the thread expiry
611 * times (in arm_timer). With an absolute time, we must
612 * check if it's already passed. In short, we need a sample.
613 */
614 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
615 cpu_clock_sample(timer->it_clock, p, &val);
616 } else {
3997ad31 617 cpu_timer_sample_group(timer->it_clock, p, &val);
1da177e4
LT
618 }
619
620 if (old) {
55ccb616 621 if (old_expires == 0) {
1da177e4
LT
622 old->it_value.tv_sec = 0;
623 old->it_value.tv_nsec = 0;
624 } else {
625 /*
626 * Update the timer in case it has
627 * overrun already. If it has,
628 * we'll report it as having overrun
629 * and with the next reloaded timer
630 * already ticking, though we are
631 * swallowing that pending
632 * notification here to install the
633 * new setting.
634 */
635 bump_cpu_timer(timer, val);
55ccb616
FW
636 if (val < timer->it.cpu.expires) {
637 old_expires = timer->it.cpu.expires - val;
5f252b32 638 old->it_value = ns_to_timespec64(old_expires);
1da177e4
LT
639 } else {
640 old->it_value.tv_nsec = 1;
641 old->it_value.tv_sec = 0;
642 }
643 }
644 }
645
a69ac4a7 646 if (unlikely(ret)) {
1da177e4
LT
647 /*
648 * We are colliding with the timer actually firing.
649 * Punt after filling in the timer's old value, and
650 * disable this firing since we are already reporting
651 * it as an overrun (thanks to bump_cpu_timer above).
652 */
e73d84e3 653 unlock_task_sighand(p, &flags);
1da177e4
LT
654 goto out;
655 }
656
e73d84e3 657 if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
55ccb616 658 new_expires += val;
1da177e4
LT
659 }
660
661 /*
662 * Install the new expiry time (or zero).
663 * For a timer with no notification action, we don't actually
664 * arm the timer (we'll just fake it for timer_gettime).
665 */
666 timer->it.cpu.expires = new_expires;
55ccb616 667 if (new_expires != 0 && val < new_expires) {
5eb9aa64 668 arm_timer(timer);
1da177e4
LT
669 }
670
e73d84e3 671 unlock_task_sighand(p, &flags);
1da177e4
LT
672 /*
673 * Install the new reload setting, and
674 * set up the signal and overrun bookkeeping.
675 */
16118794 676 timer->it_interval = timespec64_to_ktime(new->it_interval);
1da177e4
LT
677
678 /*
679 * This acts as a modification timestamp for the timer,
680 * so any automatic reload attempt will punt on seeing
681 * that we have reset the timer manually.
682 */
683 timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
684 ~REQUEUE_PENDING;
685 timer->it_overrun_last = 0;
686 timer->it_overrun = -1;
687
55ccb616 688 if (new_expires != 0 && !(val < new_expires)) {
1da177e4
LT
689 /*
690 * The designated time already passed, so we notify
691 * immediately, even if the thread never runs to
692 * accumulate more time on this clock.
693 */
694 cpu_timer_fire(timer);
695 }
696
697 ret = 0;
698 out:
ebd7e7fc 699 if (old)
5f252b32 700 old->it_interval = ns_to_timespec64(old_incr);
b7878300 701
1da177e4
LT
702 return ret;
703}
704
5f252b32 705static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
1da177e4 706{
1da177e4 707 struct task_struct *p = timer->it.cpu.task;
692117c1 708 u64 now;
1da177e4 709
692117c1
TG
710 if (WARN_ON_ONCE(!p))
711 return;
a3222f88 712
1da177e4
LT
713 /*
714 * Easy part: convert the reload time.
715 */
16118794 716 itp->it_interval = ktime_to_timespec64(timer->it_interval);
1da177e4 717
eabdec04 718 if (!timer->it.cpu.expires)
1da177e4 719 return;
1da177e4 720
1da177e4
LT
721 /*
722 * Sample the clock to take the difference with the expiry time.
723 */
724 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
725 cpu_clock_sample(timer->it_clock, p, &now);
1da177e4 726 } else {
e73d84e3
FW
727 struct sighand_struct *sighand;
728 unsigned long flags;
729
730 /*
731 * Protect against sighand release/switch in exit/exec and
732 * also make timer sampling safe if it ends up calling
ebd7e7fc 733 * thread_group_cputime().
e73d84e3
FW
734 */
735 sighand = lock_task_sighand(p, &flags);
736 if (unlikely(sighand == NULL)) {
1da177e4
LT
737 /*
738 * The process has been reaped.
739 * We can't even collect a sample any more.
740 * Call the timer disarmed, nothing else to do.
741 */
55ccb616 742 timer->it.cpu.expires = 0;
2c13ce8f 743 return;
1da177e4 744 } else {
3997ad31 745 cpu_timer_sample_group(timer->it_clock, p, &now);
e73d84e3 746 unlock_task_sighand(p, &flags);
1da177e4 747 }
1da177e4
LT
748 }
749
55ccb616 750 if (now < timer->it.cpu.expires) {
5f252b32 751 itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now);
1da177e4
LT
752 } else {
753 /*
754 * The timer should have expired already, but the firing
755 * hasn't taken place yet. Say it's just about to expire.
756 */
757 itp->it_value.tv_nsec = 1;
758 itp->it_value.tv_sec = 0;
759 }
760}
761
2473f3e7
FW
762static unsigned long long
763check_timers_list(struct list_head *timers,
764 struct list_head *firing,
765 unsigned long long curr)
766{
767 int maxfire = 20;
768
769 while (!list_empty(timers)) {
770 struct cpu_timer_list *t;
771
772 t = list_first_entry(timers, struct cpu_timer_list, entry);
773
774 if (!--maxfire || curr < t->expires)
775 return t->expires;
776
777 t->firing = 1;
778 list_move_tail(&t->entry, firing);
779 }
780
781 return 0;
782}
783
34be3930
JL
784static inline void check_dl_overrun(struct task_struct *tsk)
785{
786 if (tsk->dl.dl_overrun) {
787 tsk->dl.dl_overrun = 0;
788 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
789 }
790}
791
1da177e4
LT
792/*
793 * Check for any per-thread CPU timers that have fired and move them off
794 * the tsk->cpu_timers[N] list onto the firing list. Here we update the
795 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
796 */
797static void check_thread_timers(struct task_struct *tsk,
798 struct list_head *firing)
799{
800 struct list_head *timers = tsk->cpu_timers;
ebd7e7fc
FW
801 struct task_cputime *tsk_expires = &tsk->cputime_expires;
802 u64 expires;
d4bb5274 803 unsigned long soft;
1da177e4 804
34be3930
JL
805 if (dl_task(tsk))
806 check_dl_overrun(tsk);
807
934715a1
JL
808 /*
809 * If cputime_expires is zero, then there are no active
810 * per thread CPU timers.
811 */
812 if (task_cputime_zero(&tsk->cputime_expires))
813 return;
814
2473f3e7 815 expires = check_timers_list(timers, firing, prof_ticks(tsk));
ebd7e7fc 816 tsk_expires->prof_exp = expires;
1da177e4 817
2473f3e7 818 expires = check_timers_list(++timers, firing, virt_ticks(tsk));
ebd7e7fc 819 tsk_expires->virt_exp = expires;
1da177e4 820
2473f3e7
FW
821 tsk_expires->sched_exp = check_timers_list(++timers, firing,
822 tsk->se.sum_exec_runtime);
78f2c7db
PZ
823
824 /*
825 * Check for the special case thread timers.
826 */
3cf29496 827 soft = task_rlimit(tsk, RLIMIT_RTTIME);
d4bb5274 828 if (soft != RLIM_INFINITY) {
3cf29496 829 unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
78f2c7db 830
5a52dd50
PZ
831 if (hard != RLIM_INFINITY &&
832 tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
78f2c7db
PZ
833 /*
834 * At the hard limit, we just die.
835 * No need to calculate anything else now.
836 */
43fe8b8e
TG
837 if (print_fatal_signals) {
838 pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
839 tsk->comm, task_pid_nr(tsk));
840 }
78f2c7db
PZ
841 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
842 return;
843 }
d4bb5274 844 if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
78f2c7db
PZ
845 /*
846 * At the soft limit, send a SIGXCPU every second.
847 */
d4bb5274
JS
848 if (soft < hard) {
849 soft += USEC_PER_SEC;
3cf29496
KO
850 tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur =
851 soft;
78f2c7db 852 }
43fe8b8e
TG
853 if (print_fatal_signals) {
854 pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
855 tsk->comm, task_pid_nr(tsk));
856 }
78f2c7db
PZ
857 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
858 }
859 }
b7878300
FW
860 if (task_cputime_zero(tsk_expires))
861 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
1da177e4
LT
862}
863
1018016c 864static inline void stop_process_timers(struct signal_struct *sig)
3fccfd67 865{
15365c10 866 struct thread_group_cputimer *cputimer = &sig->cputimer;
3fccfd67 867
1018016c 868 /* Turn off cputimer->running. This is done without locking. */
d5c373eb 869 WRITE_ONCE(cputimer->running, false);
b7878300 870 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
3fccfd67
PZ
871}
872
42c4ab41 873static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
ebd7e7fc 874 u64 *expires, u64 cur_time, int signo)
42c4ab41 875{
64861634 876 if (!it->expires)
42c4ab41
SG
877 return;
878
858cf3a8
FW
879 if (cur_time >= it->expires) {
880 if (it->incr)
64861634 881 it->expires += it->incr;
858cf3a8 882 else
64861634 883 it->expires = 0;
42c4ab41 884
3f0a525e
XG
885 trace_itimer_expire(signo == SIGPROF ?
886 ITIMER_PROF : ITIMER_VIRTUAL,
6883f81a 887 task_tgid(tsk), cur_time);
42c4ab41
SG
888 __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
889 }
890
858cf3a8
FW
891 if (it->expires && (!*expires || it->expires < *expires))
892 *expires = it->expires;
42c4ab41
SG
893}
894
1da177e4
LT
895/*
896 * Check for any per-thread CPU timers that have fired and move them
897 * off the tsk->*_timers list onto the firing list. Per-thread timers
898 * have already been taken off.
899 */
900static void check_process_timers(struct task_struct *tsk,
901 struct list_head *firing)
902{
903 struct signal_struct *const sig = tsk->signal;
ebd7e7fc
FW
904 u64 utime, ptime, virt_expires, prof_expires;
905 u64 sum_sched_runtime, sched_expires;
1da177e4 906 struct list_head *timers = sig->cpu_timers;
ebd7e7fc 907 struct task_cputime cputime;
d4bb5274 908 unsigned long soft;
1da177e4 909
934715a1
JL
910 /*
911 * If cputimer is not running, then there are no active
912 * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).
913 */
914 if (!READ_ONCE(tsk->signal->cputimer.running))
915 return;
916
a324956f 917 /*
c8d75aa4
JL
918 * Signify that a thread is checking for process timers.
919 * Write access to this field is protected by the sighand lock.
920 */
921 sig->cputimer.checking_timer = true;
922
1da177e4 923 /*
a324956f
TG
924 * Collect the current process totals. Group accounting is active
925 * so the sample can be taken directly.
1da177e4 926 */
a324956f 927 sample_cputime_atomic(&cputime, &sig->cputimer.cputime_atomic);
ebd7e7fc
FW
928 utime = cputime.utime;
929 ptime = utime + cputime.stime;
f06febc9 930 sum_sched_runtime = cputime.sum_exec_runtime;
1da177e4 931
2473f3e7
FW
932 prof_expires = check_timers_list(timers, firing, ptime);
933 virt_expires = check_timers_list(++timers, firing, utime);
934 sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
1da177e4
LT
935
936 /*
937 * Check for the special case process timers.
938 */
42c4ab41
SG
939 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
940 SIGPROF);
941 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
942 SIGVTALRM);
3cf29496 943 soft = task_rlimit(tsk, RLIMIT_CPU);
d4bb5274 944 if (soft != RLIM_INFINITY) {
ebd7e7fc 945 unsigned long psecs = div_u64(ptime, NSEC_PER_SEC);
3cf29496 946 unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
ebd7e7fc 947 u64 x;
d4bb5274 948 if (psecs >= hard) {
1da177e4
LT
949 /*
950 * At the hard limit, we just die.
951 * No need to calculate anything else now.
952 */
43fe8b8e
TG
953 if (print_fatal_signals) {
954 pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
955 tsk->comm, task_pid_nr(tsk));
956 }
1da177e4
LT
957 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
958 return;
959 }
d4bb5274 960 if (psecs >= soft) {
1da177e4
LT
961 /*
962 * At the soft limit, send a SIGXCPU every second.
963 */
43fe8b8e
TG
964 if (print_fatal_signals) {
965 pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
966 tsk->comm, task_pid_nr(tsk));
967 }
1da177e4 968 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
d4bb5274
JS
969 if (soft < hard) {
970 soft++;
971 sig->rlim[RLIMIT_CPU].rlim_cur = soft;
1da177e4
LT
972 }
973 }
ebd7e7fc
FW
974 x = soft * NSEC_PER_SEC;
975 if (!prof_expires || x < prof_expires)
1da177e4 976 prof_expires = x;
1da177e4
LT
977 }
978
ebd7e7fc
FW
979 sig->cputime_expires.prof_exp = prof_expires;
980 sig->cputime_expires.virt_exp = virt_expires;
29f87b79
SG
981 sig->cputime_expires.sched_exp = sched_expires;
982 if (task_cputime_zero(&sig->cputime_expires))
983 stop_process_timers(sig);
c8d75aa4
JL
984
985 sig->cputimer.checking_timer = false;
1da177e4
LT
986}
987
988/*
96fe3b07 989 * This is called from the signal code (via posixtimer_rearm)
1da177e4
LT
990 * when the last timer signal was delivered and we have to reload the timer.
991 */
f37fb0aa 992static void posix_cpu_timer_rearm(struct k_itimer *timer)
1da177e4 993{
692117c1 994 struct task_struct *p = timer->it.cpu.task;
e73d84e3
FW
995 struct sighand_struct *sighand;
996 unsigned long flags;
ebd7e7fc 997 u64 now;
1da177e4 998
692117c1
TG
999 if (WARN_ON_ONCE(!p))
1000 return;
1da177e4
LT
1001
1002 /*
1003 * Fetch the current sample and update the timer's expiry time.
1004 */
1005 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1006 cpu_clock_sample(timer->it_clock, p, &now);
1007 bump_cpu_timer(timer, now);
724a3713 1008 if (unlikely(p->exit_state))
af888d67 1009 return;
724a3713 1010
e73d84e3
FW
1011 /* Protect timer list r/w in arm_timer() */
1012 sighand = lock_task_sighand(p, &flags);
1013 if (!sighand)
af888d67 1014 return;
1da177e4 1015 } else {
e73d84e3
FW
1016 /*
1017 * Protect arm_timer() and timer sampling in case of call to
ebd7e7fc 1018 * thread_group_cputime().
e73d84e3
FW
1019 */
1020 sighand = lock_task_sighand(p, &flags);
1021 if (unlikely(sighand == NULL)) {
1da177e4
LT
1022 /*
1023 * The process has been reaped.
1024 * We can't even collect a sample any more.
1025 */
55ccb616 1026 timer->it.cpu.expires = 0;
af888d67 1027 return;
1da177e4 1028 } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
af888d67
TG
1029 /* If the process is dying, no need to rearm */
1030 goto unlock;
1da177e4 1031 }
3997ad31 1032 cpu_timer_sample_group(timer->it_clock, p, &now);
1da177e4 1033 bump_cpu_timer(timer, now);
e73d84e3 1034 /* Leave the sighand locked for the call below. */
1da177e4
LT
1035 }
1036
1037 /*
1038 * Now re-arm for the new expiry time.
1039 */
5eb9aa64 1040 arm_timer(timer);
af888d67 1041unlock:
e73d84e3 1042 unlock_task_sighand(p, &flags);
1da177e4
LT
1043}
1044
f06febc9
FM
1045/**
1046 * task_cputime_expired - Compare two task_cputime entities.
1047 *
1048 * @sample: The task_cputime structure to be checked for expiration.
1049 * @expires: Expiration times, against which @sample will be checked.
1050 *
1051 * Checks @sample against @expires to see if any field of @sample has expired.
1052 * Returns true if any field of the former is greater than the corresponding
1053 * field of the latter if the latter field is set. Otherwise returns false.
1054 */
ebd7e7fc
FW
1055static inline int task_cputime_expired(const struct task_cputime *sample,
1056 const struct task_cputime *expires)
f06febc9 1057{
64861634 1058 if (expires->utime && sample->utime >= expires->utime)
f06febc9 1059 return 1;
64861634 1060 if (expires->stime && sample->utime + sample->stime >= expires->stime)
f06febc9
FM
1061 return 1;
1062 if (expires->sum_exec_runtime != 0 &&
1063 sample->sum_exec_runtime >= expires->sum_exec_runtime)
1064 return 1;
1065 return 0;
1066}
1067
1068/**
1069 * fastpath_timer_check - POSIX CPU timers fast path.
1070 *
1071 * @tsk: The task (thread) being checked.
f06febc9 1072 *
bb34d92f
FM
1073 * Check the task and thread group timers. If both are zero (there are no
1074 * timers set) return false. Otherwise snapshot the task and thread group
1075 * timers and compare them with the corresponding expiration times. Return
1076 * true if a timer has expired, else return false.
f06febc9 1077 */
bb34d92f 1078static inline int fastpath_timer_check(struct task_struct *tsk)
f06febc9 1079{
ad133ba3 1080 struct signal_struct *sig;
bb34d92f 1081
bb34d92f 1082 if (!task_cputime_zero(&tsk->cputime_expires)) {
ebd7e7fc 1083 struct task_cputime task_sample;
bb34d92f 1084
ebd7e7fc 1085 task_cputime(tsk, &task_sample.utime, &task_sample.stime);
7c177d99 1086 task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
bb34d92f
FM
1087 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1088 return 1;
1089 }
ad133ba3
ON
1090
1091 sig = tsk->signal;
c8d75aa4
JL
1092 /*
1093 * Check if thread group timers expired when the cputimer is
1094 * running and no other thread in the group is already checking
1095 * for thread group cputimers. These fields are read without the
1096 * sighand lock. However, this is fine because this is meant to
1097 * be a fastpath heuristic to determine whether we should try to
1098 * acquire the sighand lock to check/handle timers.
1099 *
1100 * In the worst case scenario, if 'running' or 'checking_timer' gets
1101 * set but the current thread doesn't see the change yet, we'll wait
1102 * until the next thread in the group gets a scheduler interrupt to
1103 * handle the timer. This isn't an issue in practice because these
1104 * types of delays with signals actually getting sent are expected.
1105 */
1106 if (READ_ONCE(sig->cputimer.running) &&
1107 !READ_ONCE(sig->cputimer.checking_timer)) {
ebd7e7fc 1108 struct task_cputime group_sample;
bb34d92f 1109
71107445 1110 sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
8d1f431c 1111
bb34d92f
FM
1112 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1113 return 1;
1114 }
37bebc70 1115
34be3930
JL
1116 if (dl_task(tsk) && tsk->dl.dl_overrun)
1117 return 1;
1118
f55db609 1119 return 0;
f06febc9
FM
1120}
1121
1da177e4
LT
1122/*
1123 * This is called from the timer interrupt handler. The irq handler has
1124 * already updated our counts. We need to check if any timers fire now.
1125 * Interrupts are disabled.
1126 */
dce3e8fd 1127void run_posix_cpu_timers(void)
1da177e4 1128{
dce3e8fd 1129 struct task_struct *tsk = current;
1da177e4 1130 struct k_itimer *timer, *next;
0bdd2ed4 1131 unsigned long flags;
dce3e8fd 1132 LIST_HEAD(firing);
1da177e4 1133
a6968220 1134 lockdep_assert_irqs_disabled();
1da177e4 1135
1da177e4 1136 /*
f06febc9 1137 * The fast path checks that there are no expired thread or thread
bb34d92f 1138 * group timers. If that's so, just return.
1da177e4 1139 */
bb34d92f 1140 if (!fastpath_timer_check(tsk))
f06febc9 1141 return;
5ce73a4a 1142
0bdd2ed4
ON
1143 if (!lock_task_sighand(tsk, &flags))
1144 return;
bb34d92f
FM
1145 /*
1146 * Here we take off tsk->signal->cpu_timers[N] and
1147 * tsk->cpu_timers[N] all the timers that are firing, and
1148 * put them on the firing list.
1149 */
1150 check_thread_timers(tsk, &firing);
934715a1
JL
1151
1152 check_process_timers(tsk, &firing);
1da177e4 1153
bb34d92f
FM
1154 /*
1155 * We must release these locks before taking any timer's lock.
1156 * There is a potential race with timer deletion here, as the
1157 * siglock now protects our private firing list. We have set
1158 * the firing flag in each timer, so that a deletion attempt
1159 * that gets the timer lock before we do will give it up and
1160 * spin until we've taken care of that timer below.
1161 */
0bdd2ed4 1162 unlock_task_sighand(tsk, &flags);
1da177e4
LT
1163
1164 /*
1165 * Now that all the timers on our list have the firing flag,
25985edc 1166 * no one will touch their list entries but us. We'll take
1da177e4
LT
1167 * each timer's lock before clearing its firing flag, so no
1168 * timer call will interfere.
1169 */
1170 list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
6e85c5ba
HS
1171 int cpu_firing;
1172
1da177e4
LT
1173 spin_lock(&timer->it_lock);
1174 list_del_init(&timer->it.cpu.entry);
6e85c5ba 1175 cpu_firing = timer->it.cpu.firing;
1da177e4
LT
1176 timer->it.cpu.firing = 0;
1177 /*
1178 * The firing flag is -1 if we collided with a reset
1179 * of the timer, which already reported this
1180 * almost-firing as an overrun. So don't generate an event.
1181 */
6e85c5ba 1182 if (likely(cpu_firing >= 0))
1da177e4 1183 cpu_timer_fire(timer);
1da177e4
LT
1184 spin_unlock(&timer->it_lock);
1185 }
1186}
1187
1188/*
f55db609 1189 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
f06febc9 1190 * The tsk->sighand->siglock must be held by the caller.
1da177e4
LT
1191 */
1192void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
858cf3a8 1193 u64 *newval, u64 *oldval)
1da177e4 1194{
858cf3a8 1195 u64 now;
c3bca5d4 1196 int ret;
1da177e4 1197
692117c1
TG
1198 if (WARN_ON_ONCE(clock_idx >= CPUCLOCK_SCHED))
1199 return;
1200
c3bca5d4 1201 ret = cpu_timer_sample_group(clock_idx, tsk, &now);
1da177e4 1202
c3bca5d4 1203 if (oldval && ret != -EINVAL) {
f55db609
SG
1204 /*
1205 * We are setting itimer. The *oldval is absolute and we update
1206 * it to be relative, *newval argument is relative and we update
1207 * it to be absolute.
1208 */
64861634 1209 if (*oldval) {
858cf3a8 1210 if (*oldval <= now) {
1da177e4 1211 /* Just about to fire. */
858cf3a8 1212 *oldval = TICK_NSEC;
1da177e4 1213 } else {
858cf3a8 1214 *oldval -= now;
1da177e4
LT
1215 }
1216 }
1217
64861634 1218 if (!*newval)
b7878300 1219 return;
858cf3a8 1220 *newval += now;
1da177e4
LT
1221 }
1222
1223 /*
f55db609
SG
1224 * Update expiration cache if we are the earliest timer, or eventually
1225 * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
1da177e4 1226 */
f55db609
SG
1227 switch (clock_idx) {
1228 case CPUCLOCK_PROF:
858cf3a8
FW
1229 if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
1230 tsk->signal->cputime_expires.prof_exp = *newval;
f55db609
SG
1231 break;
1232 case CPUCLOCK_VIRT:
858cf3a8
FW
1233 if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
1234 tsk->signal->cputime_expires.virt_exp = *newval;
f55db609 1235 break;
1da177e4 1236 }
b7878300
FW
1237
1238 tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
1da177e4
LT
1239}
1240
e4b76555 1241static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
343d8fc2 1242 const struct timespec64 *rqtp)
1da177e4 1243{
86a9c446 1244 struct itimerspec64 it;
343d8fc2
TG
1245 struct k_itimer timer;
1246 u64 expires;
1da177e4
LT
1247 int error;
1248
1da177e4
LT
1249 /*
1250 * Set up a temporary timer and then wait for it to go off.
1251 */
1252 memset(&timer, 0, sizeof timer);
1253 spin_lock_init(&timer.it_lock);
1254 timer.it_clock = which_clock;
1255 timer.it_overrun = -1;
1256 error = posix_cpu_timer_create(&timer);
1257 timer.it_process = current;
1258 if (!error) {
5f252b32 1259 static struct itimerspec64 zero_it;
edbeda46 1260 struct restart_block *restart;
e4b76555 1261
edbeda46 1262 memset(&it, 0, sizeof(it));
86a9c446 1263 it.it_value = *rqtp;
1da177e4
LT
1264
1265 spin_lock_irq(&timer.it_lock);
86a9c446 1266 error = posix_cpu_timer_set(&timer, flags, &it, NULL);
1da177e4
LT
1267 if (error) {
1268 spin_unlock_irq(&timer.it_lock);
1269 return error;
1270 }
1271
1272 while (!signal_pending(current)) {
55ccb616 1273 if (timer.it.cpu.expires == 0) {
1da177e4 1274 /*
e6c42c29
SG
1275 * Our timer fired and was reset, below
1276 * deletion can not fail.
1da177e4 1277 */
e6c42c29 1278 posix_cpu_timer_del(&timer);
1da177e4
LT
1279 spin_unlock_irq(&timer.it_lock);
1280 return 0;
1281 }
1282
1283 /*
1284 * Block until cpu_timer_fire (or a signal) wakes us.
1285 */
1286 __set_current_state(TASK_INTERRUPTIBLE);
1287 spin_unlock_irq(&timer.it_lock);
1288 schedule();
1289 spin_lock_irq(&timer.it_lock);
1290 }
1291
1292 /*
1293 * We were interrupted by a signal.
1294 */
343d8fc2 1295 expires = timer.it.cpu.expires;
86a9c446 1296 error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
e6c42c29
SG
1297 if (!error) {
1298 /*
1299 * Timer is now unarmed, deletion can not fail.
1300 */
1301 posix_cpu_timer_del(&timer);
1302 }
1da177e4
LT
1303 spin_unlock_irq(&timer.it_lock);
1304
e6c42c29
SG
1305 while (error == TIMER_RETRY) {
1306 /*
1307 * We need to handle case when timer was or is in the
1308 * middle of firing. In other cases we already freed
1309 * resources.
1310 */
1311 spin_lock_irq(&timer.it_lock);
1312 error = posix_cpu_timer_del(&timer);
1313 spin_unlock_irq(&timer.it_lock);
1314 }
1315
86a9c446 1316 if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
1da177e4
LT
1317 /*
1318 * It actually did fire already.
1319 */
1320 return 0;
1321 }
1322
e4b76555 1323 error = -ERESTART_RESTARTBLOCK;
86a9c446
AV
1324 /*
1325 * Report back to the user the time still remaining.
1326 */
edbeda46 1327 restart = &current->restart_block;
343d8fc2 1328 restart->nanosleep.expires = expires;
c0edd7c9
DD
1329 if (restart->nanosleep.type != TT_NONE)
1330 error = nanosleep_copyout(restart, &it.it_value);
e4b76555
TA
1331 }
1332
1333 return error;
1334}
1335
bc2c8ea4
TG
1336static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1337
1338static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
938e7cf2 1339 const struct timespec64 *rqtp)
e4b76555 1340{
f56141e3 1341 struct restart_block *restart_block = &current->restart_block;
e4b76555
TA
1342 int error;
1343
1344 /*
1345 * Diagnose required errors first.
1346 */
1347 if (CPUCLOCK_PERTHREAD(which_clock) &&
1348 (CPUCLOCK_PID(which_clock) == 0 ||
01a21974 1349 CPUCLOCK_PID(which_clock) == task_pid_vnr(current)))
e4b76555
TA
1350 return -EINVAL;
1351
86a9c446 1352 error = do_cpu_nanosleep(which_clock, flags, rqtp);
e4b76555
TA
1353
1354 if (error == -ERESTART_RESTARTBLOCK) {
1355
3751f9f2 1356 if (flags & TIMER_ABSTIME)
e4b76555 1357 return -ERESTARTNOHAND;
1da177e4 1358
1711ef38 1359 restart_block->fn = posix_cpu_nsleep_restart;
ab8177bc 1360 restart_block->nanosleep.clockid = which_clock;
1da177e4 1361 }
1da177e4
LT
1362 return error;
1363}
1364
bc2c8ea4 1365static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1da177e4 1366{
ab8177bc 1367 clockid_t which_clock = restart_block->nanosleep.clockid;
ad196384 1368 struct timespec64 t;
97735f25 1369
ad196384 1370 t = ns_to_timespec64(restart_block->nanosleep.expires);
97735f25 1371
86a9c446 1372 return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t);
1da177e4
LT
1373}
1374
29f1b2b0
ND
1375#define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED)
1376#define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED)
1da177e4 1377
a924b04d 1378static int process_cpu_clock_getres(const clockid_t which_clock,
d2e3e0ca 1379 struct timespec64 *tp)
1da177e4
LT
1380{
1381 return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1382}
a924b04d 1383static int process_cpu_clock_get(const clockid_t which_clock,
3c9c12f4 1384 struct timespec64 *tp)
1da177e4
LT
1385{
1386 return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1387}
1388static int process_cpu_timer_create(struct k_itimer *timer)
1389{
1390 timer->it_clock = PROCESS_CLOCK;
1391 return posix_cpu_timer_create(timer);
1392}
a924b04d 1393static int process_cpu_nsleep(const clockid_t which_clock, int flags,
938e7cf2 1394 const struct timespec64 *rqtp)
1da177e4 1395{
99e6c0e6 1396 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
1da177e4 1397}
a924b04d 1398static int thread_cpu_clock_getres(const clockid_t which_clock,
d2e3e0ca 1399 struct timespec64 *tp)
1da177e4
LT
1400{
1401 return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1402}
a924b04d 1403static int thread_cpu_clock_get(const clockid_t which_clock,
3c9c12f4 1404 struct timespec64 *tp)
1da177e4
LT
1405{
1406 return posix_cpu_clock_get(THREAD_CLOCK, tp);
1407}
1408static int thread_cpu_timer_create(struct k_itimer *timer)
1409{
1410 timer->it_clock = THREAD_CLOCK;
1411 return posix_cpu_timer_create(timer);
1412}
1da177e4 1413
d3ba5a9a 1414const struct k_clock clock_posix_cpu = {
1976945e
TG
1415 .clock_getres = posix_cpu_clock_getres,
1416 .clock_set = posix_cpu_clock_set,
1417 .clock_get = posix_cpu_clock_get,
1418 .timer_create = posix_cpu_timer_create,
1419 .nsleep = posix_cpu_nsleep,
1976945e
TG
1420 .timer_set = posix_cpu_timer_set,
1421 .timer_del = posix_cpu_timer_del,
1422 .timer_get = posix_cpu_timer_get,
f37fb0aa 1423 .timer_rearm = posix_cpu_timer_rearm,
1976945e
TG
1424};
1425
d3ba5a9a
CH
1426const struct k_clock clock_process = {
1427 .clock_getres = process_cpu_clock_getres,
1428 .clock_get = process_cpu_clock_get,
1429 .timer_create = process_cpu_timer_create,
1430 .nsleep = process_cpu_nsleep,
d3ba5a9a 1431};
1da177e4 1432
d3ba5a9a
CH
1433const struct k_clock clock_thread = {
1434 .clock_getres = thread_cpu_clock_getres,
1435 .clock_get = thread_cpu_clock_get,
1436 .timer_create = thread_cpu_timer_create,
1437};