Merge tag 'fbdev-for-6.4-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/deller...
[linux-block.git] / include / linux / psi_types.h
CommitLineData
2fb75e1b 1/* SPDX-License-Identifier: GPL-2.0 */
eb414681
JW
2#ifndef _LINUX_PSI_TYPES_H
3#define _LINUX_PSI_TYPES_H
4
0e94682b 5#include <linux/kthread.h>
eb414681
JW
6#include <linux/seqlock.h>
7#include <linux/types.h>
0e94682b
SB
8#include <linux/kref.h>
9#include <linux/wait.h>
eb414681
JW
10
11#ifdef CONFIG_PSI
12
13/* Tracked task states */
14enum psi_task_count {
15 NR_IOWAIT,
16 NR_MEMSTALL,
17 NR_RUNNING,
cb0e52b7
BC
18 /*
19 * For IO and CPU stalls the presence of running/oncpu tasks
20 * in the domain means a partial rather than a full stall.
21 * For memory it's not so simple because of page reclaimers:
22 * they are running/oncpu while representing a stall. To tell
23 * whether a domain has productivity left or not, we need to
24 * distinguish between regular running (i.e. productive)
25 * threads and memstall ones.
26 */
27 NR_MEMSTALL_RUNNING,
71dbdde7 28 NR_PSI_TASK_COUNTS = 4,
eb414681
JW
29};
30
31/* Task state bitmasks */
32#define TSK_IOWAIT (1 << NR_IOWAIT)
33#define TSK_MEMSTALL (1 << NR_MEMSTALL)
34#define TSK_RUNNING (1 << NR_RUNNING)
cb0e52b7 35#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
eb414681 36
71dbdde7
JW
37/* Only one task can be scheduled, no corresponding task count */
38#define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS)
39
eb414681
JW
40/* Resources that workloads could be stalled on */
41enum psi_res {
42 PSI_IO,
43 PSI_MEM,
44 PSI_CPU,
52b1364b
CZ
45#ifdef CONFIG_IRQ_TIME_ACCOUNTING
46 PSI_IRQ,
47#endif
48 NR_PSI_RESOURCES,
eb414681
JW
49};
50
51/*
52 * Pressure states for each resource:
53 *
54 * SOME: Stalled tasks & working tasks
55 * FULL: Stalled tasks & no working tasks
56 */
57enum psi_states {
58 PSI_IO_SOME,
59 PSI_IO_FULL,
60 PSI_MEM_SOME,
61 PSI_MEM_FULL,
62 PSI_CPU_SOME,
e7fcd762 63 PSI_CPU_FULL,
52b1364b
CZ
64#ifdef CONFIG_IRQ_TIME_ACCOUNTING
65 PSI_IRQ_FULL,
66#endif
eb414681
JW
67 /* Only per-CPU, to weigh the CPU in the global average: */
68 PSI_NONIDLE,
52b1364b 69 NR_PSI_STATES,
eb414681
JW
70};
71
71dbdde7
JW
72/* Use one bit in the state mask to track TSK_ONCPU */
73#define PSI_ONCPU (1 << NR_PSI_STATES)
74
2fcd7bba
CZ
75/* Flag whether to re-arm avgs_work, see details in get_recent_times() */
76#define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1))
77
0e94682b
SB
78enum psi_aggregators {
79 PSI_AVGS = 0,
80 PSI_POLL,
81 NR_PSI_AGGREGATORS,
82};
83
eb414681
JW
84struct psi_group_cpu {
85 /* 1st cacheline updated by the scheduler */
86
87 /* Aggregator needs to know of concurrent changes */
88 seqcount_t seq ____cacheline_aligned_in_smp;
89
90 /* States of the tasks belonging to this group */
91 unsigned int tasks[NR_PSI_TASK_COUNTS];
92
33b2d630
SB
93 /* Aggregate pressure state derived from the tasks */
94 u32 state_mask;
95
eb414681
JW
96 /* Period time sampling buckets for each state of interest (ns) */
97 u32 times[NR_PSI_STATES];
98
99 /* Time of last task change in this group (rq_clock) */
100 u64 state_start;
101
102 /* 2nd cacheline updated by the aggregator */
103
104 /* Delta detection against the sampling buckets */
0e94682b
SB
105 u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
106 ____cacheline_aligned_in_smp;
107};
108
109/* PSI growth tracking window */
110struct psi_window {
111 /* Window size in ns */
112 u64 size;
113
114 /* Start time of the current window in ns */
115 u64 start_time;
116
117 /* Value at the start of the window */
118 u64 start_value;
119
120 /* Value growth in the previous window */
121 u64 prev_growth;
122};
123
124struct psi_trigger {
125 /* PSI state being monitored by the trigger */
126 enum psi_states state;
127
128 /* User-spacified threshold in ns */
129 u64 threshold;
130
131 /* List node inside triggers list */
132 struct list_head node;
133
134 /* Backpointer needed during trigger destruction */
135 struct psi_group *group;
136
137 /* Wait queue for polling */
138 wait_queue_head_t event_wait;
139
140 /* Pending event flag */
141 int event;
142
143 /* Tracking window */
144 struct psi_window win;
145
146 /*
147 * Time last event was generated. Used for rate-limiting
148 * events to one per window
149 */
150 u64 last_event_time;
151
e6df4ead
ZH
152 /* Deferred event(s) from previous ratelimit window */
153 bool pending_event;
d82caa27
DC
154
155 /* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */
156 enum psi_aggregators aggregator;
eb414681
JW
157};
158
159struct psi_group {
dc86aba7 160 struct psi_group *parent;
34f26a15 161 bool enabled;
dc86aba7 162
bcc78db6
SB
163 /* Protects data used by the aggregator */
164 struct mutex avgs_lock;
eb414681
JW
165
166 /* Per-cpu task state & time tracking */
167 struct psi_group_cpu __percpu *pcpu;
168
bcc78db6
SB
169 /* Running pressure averages */
170 u64 avg_total[NR_PSI_STATES - 1];
171 u64 avg_last_update;
172 u64 avg_next_update;
0e94682b
SB
173
174 /* Aggregator work control */
bcc78db6 175 struct delayed_work avgs_work;
eb414681 176
d82caa27
DC
177 /* Unprivileged triggers against N*PSI_FREQ windows */
178 struct list_head avg_triggers;
179 u32 avg_nr_triggers[NR_PSI_STATES - 1];
180
eb414681 181 /* Total stall times and sampled pressure averages */
0e94682b 182 u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
eb414681 183 unsigned long avg[NR_PSI_STATES - 1][3];
0e94682b 184
65457b74
DC
185 /* Monitor RT polling work control */
186 struct task_struct __rcu *rtpoll_task;
187 struct timer_list rtpoll_timer;
188 wait_queue_head_t rtpoll_wait;
189 atomic_t rtpoll_wakeup;
190 atomic_t rtpoll_scheduled;
0e94682b
SB
191
192 /* Protects data used by the monitor */
65457b74
DC
193 struct mutex rtpoll_trigger_lock;
194
195 /* Configured RT polling triggers */
196 struct list_head rtpoll_triggers;
197 u32 rtpoll_nr_triggers[NR_PSI_STATES - 1];
198 u32 rtpoll_states;
199 u64 rtpoll_min_period;
200
201 /* Total stall times at the start of RT polling monitor activation */
202 u64 rtpoll_total[NR_PSI_STATES - 1];
203 u64 rtpoll_next_update;
204 u64 rtpoll_until;
eb414681
JW
205};
206
207#else /* CONFIG_PSI */
208
34f26a15
CZ
209#define NR_PSI_RESOURCES 0
210
eb414681
JW
211struct psi_group { };
212
213#endif /* CONFIG_PSI */
214
215#endif /* _LINUX_PSI_TYPES_H */