Commit | Line | Data |
---|---|---|
2fb75e1b | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
eb414681 JW |
2 | #ifndef _LINUX_PSI_TYPES_H |
3 | #define _LINUX_PSI_TYPES_H | |
4 | ||
0e94682b | 5 | #include <linux/kthread.h> |
eb414681 JW |
6 | #include <linux/seqlock.h> |
7 | #include <linux/types.h> | |
0e94682b SB |
8 | #include <linux/kref.h> |
9 | #include <linux/wait.h> | |
eb414681 JW |
10 | |
11 | #ifdef CONFIG_PSI | |
12 | ||
13 | /* Tracked task states */ | |
14 | enum psi_task_count { | |
15 | NR_IOWAIT, | |
16 | NR_MEMSTALL, | |
17 | NR_RUNNING, | |
cb0e52b7 BC |
18 | /* |
19 | * For IO and CPU stalls the presence of running/oncpu tasks | |
20 | * in the domain means a partial rather than a full stall. | |
21 | * For memory it's not so simple because of page reclaimers: | |
22 | * they are running/oncpu while representing a stall. To tell | |
23 | * whether a domain has productivity left or not, we need to | |
24 | * distinguish between regular running (i.e. productive) | |
25 | * threads and memstall ones. | |
26 | */ | |
27 | NR_MEMSTALL_RUNNING, | |
71dbdde7 | 28 | NR_PSI_TASK_COUNTS = 4, |
eb414681 JW |
29 | }; |
30 | ||
31 | /* Task state bitmasks */ | |
32 | #define TSK_IOWAIT (1 << NR_IOWAIT) | |
33 | #define TSK_MEMSTALL (1 << NR_MEMSTALL) | |
34 | #define TSK_RUNNING (1 << NR_RUNNING) | |
cb0e52b7 | 35 | #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING) |
eb414681 | 36 | |
71dbdde7 JW |
37 | /* Only one task can be scheduled, no corresponding task count */ |
38 | #define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS) | |
39 | ||
eb414681 JW |
40 | /* Resources that workloads could be stalled on */ |
41 | enum psi_res { | |
42 | PSI_IO, | |
43 | PSI_MEM, | |
44 | PSI_CPU, | |
52b1364b CZ |
45 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
46 | PSI_IRQ, | |
47 | #endif | |
48 | NR_PSI_RESOURCES, | |
eb414681 JW |
49 | }; |
50 | ||
51 | /* | |
52 | * Pressure states for each resource: | |
53 | * | |
54 | * SOME: Stalled tasks & working tasks | |
55 | * FULL: Stalled tasks & no working tasks | |
56 | */ | |
57 | enum psi_states { | |
58 | PSI_IO_SOME, | |
59 | PSI_IO_FULL, | |
60 | PSI_MEM_SOME, | |
61 | PSI_MEM_FULL, | |
62 | PSI_CPU_SOME, | |
e7fcd762 | 63 | PSI_CPU_FULL, |
52b1364b CZ |
64 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
65 | PSI_IRQ_FULL, | |
66 | #endif | |
eb414681 JW |
67 | /* Only per-CPU, to weigh the CPU in the global average: */ |
68 | PSI_NONIDLE, | |
52b1364b | 69 | NR_PSI_STATES, |
eb414681 JW |
70 | }; |
71 | ||
71dbdde7 JW |
72 | /* Use one bit in the state mask to track TSK_ONCPU */ |
73 | #define PSI_ONCPU (1 << NR_PSI_STATES) | |
74 | ||
2fcd7bba CZ |
75 | /* Flag whether to re-arm avgs_work, see details in get_recent_times() */ |
76 | #define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1)) | |
77 | ||
0e94682b SB |
78 | enum psi_aggregators { |
79 | PSI_AVGS = 0, | |
80 | PSI_POLL, | |
81 | NR_PSI_AGGREGATORS, | |
82 | }; | |
83 | ||
eb414681 JW |
84 | struct psi_group_cpu { |
85 | /* 1st cacheline updated by the scheduler */ | |
86 | ||
87 | /* Aggregator needs to know of concurrent changes */ | |
88 | seqcount_t seq ____cacheline_aligned_in_smp; | |
89 | ||
90 | /* States of the tasks belonging to this group */ | |
91 | unsigned int tasks[NR_PSI_TASK_COUNTS]; | |
92 | ||
33b2d630 SB |
93 | /* Aggregate pressure state derived from the tasks */ |
94 | u32 state_mask; | |
95 | ||
eb414681 JW |
96 | /* Period time sampling buckets for each state of interest (ns) */ |
97 | u32 times[NR_PSI_STATES]; | |
98 | ||
99 | /* Time of last task change in this group (rq_clock) */ | |
100 | u64 state_start; | |
101 | ||
102 | /* 2nd cacheline updated by the aggregator */ | |
103 | ||
104 | /* Delta detection against the sampling buckets */ | |
0e94682b SB |
105 | u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES] |
106 | ____cacheline_aligned_in_smp; | |
107 | }; | |
108 | ||
109 | /* PSI growth tracking window */ | |
110 | struct psi_window { | |
111 | /* Window size in ns */ | |
112 | u64 size; | |
113 | ||
114 | /* Start time of the current window in ns */ | |
115 | u64 start_time; | |
116 | ||
117 | /* Value at the start of the window */ | |
118 | u64 start_value; | |
119 | ||
120 | /* Value growth in the previous window */ | |
121 | u64 prev_growth; | |
122 | }; | |
123 | ||
124 | struct psi_trigger { | |
125 | /* PSI state being monitored by the trigger */ | |
126 | enum psi_states state; | |
127 | ||
128 | /* User-spacified threshold in ns */ | |
129 | u64 threshold; | |
130 | ||
131 | /* List node inside triggers list */ | |
132 | struct list_head node; | |
133 | ||
134 | /* Backpointer needed during trigger destruction */ | |
135 | struct psi_group *group; | |
136 | ||
137 | /* Wait queue for polling */ | |
138 | wait_queue_head_t event_wait; | |
139 | ||
140 | /* Pending event flag */ | |
141 | int event; | |
142 | ||
143 | /* Tracking window */ | |
144 | struct psi_window win; | |
145 | ||
146 | /* | |
147 | * Time last event was generated. Used for rate-limiting | |
148 | * events to one per window | |
149 | */ | |
150 | u64 last_event_time; | |
151 | ||
e6df4ead ZH |
152 | /* Deferred event(s) from previous ratelimit window */ |
153 | bool pending_event; | |
d82caa27 DC |
154 | |
155 | /* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */ | |
156 | enum psi_aggregators aggregator; | |
eb414681 JW |
157 | }; |
158 | ||
159 | struct psi_group { | |
dc86aba7 | 160 | struct psi_group *parent; |
34f26a15 | 161 | bool enabled; |
dc86aba7 | 162 | |
bcc78db6 SB |
163 | /* Protects data used by the aggregator */ |
164 | struct mutex avgs_lock; | |
eb414681 JW |
165 | |
166 | /* Per-cpu task state & time tracking */ | |
167 | struct psi_group_cpu __percpu *pcpu; | |
168 | ||
bcc78db6 SB |
169 | /* Running pressure averages */ |
170 | u64 avg_total[NR_PSI_STATES - 1]; | |
171 | u64 avg_last_update; | |
172 | u64 avg_next_update; | |
0e94682b SB |
173 | |
174 | /* Aggregator work control */ | |
bcc78db6 | 175 | struct delayed_work avgs_work; |
eb414681 | 176 | |
d82caa27 DC |
177 | /* Unprivileged triggers against N*PSI_FREQ windows */ |
178 | struct list_head avg_triggers; | |
179 | u32 avg_nr_triggers[NR_PSI_STATES - 1]; | |
180 | ||
eb414681 | 181 | /* Total stall times and sampled pressure averages */ |
0e94682b | 182 | u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1]; |
eb414681 | 183 | unsigned long avg[NR_PSI_STATES - 1][3]; |
0e94682b | 184 | |
65457b74 DC |
185 | /* Monitor RT polling work control */ |
186 | struct task_struct __rcu *rtpoll_task; | |
187 | struct timer_list rtpoll_timer; | |
188 | wait_queue_head_t rtpoll_wait; | |
189 | atomic_t rtpoll_wakeup; | |
190 | atomic_t rtpoll_scheduled; | |
0e94682b SB |
191 | |
192 | /* Protects data used by the monitor */ | |
65457b74 DC |
193 | struct mutex rtpoll_trigger_lock; |
194 | ||
195 | /* Configured RT polling triggers */ | |
196 | struct list_head rtpoll_triggers; | |
197 | u32 rtpoll_nr_triggers[NR_PSI_STATES - 1]; | |
198 | u32 rtpoll_states; | |
199 | u64 rtpoll_min_period; | |
200 | ||
201 | /* Total stall times at the start of RT polling monitor activation */ | |
202 | u64 rtpoll_total[NR_PSI_STATES - 1]; | |
203 | u64 rtpoll_next_update; | |
204 | u64 rtpoll_until; | |
eb414681 JW |
205 | }; |
206 | ||
207 | #else /* CONFIG_PSI */ | |
208 | ||
34f26a15 CZ |
209 | #define NR_PSI_RESOURCES 0 |
210 | ||
eb414681 JW |
211 | struct psi_group { }; |
212 | ||
213 | #endif /* CONFIG_PSI */ | |
214 | ||
215 | #endif /* _LINUX_PSI_TYPES_H */ |