Commit | Line | Data |
---|---|---|
c5350777 LY |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/version.h> | |
4 | #include <linux/ptrace.h> | |
5 | #include <uapi/linux/bpf.h> | |
7cf245a3 | 6 | #include <bpf/bpf_helpers.h> |
c5350777 LY |
7 | |
8 | /* | |
9 | * The CPU number, cstate number and pstate number are based | |
10 | * on 96boards Hikey with octa CA53 CPUs. | |
11 | * | |
12 | * Every CPU have three idle states for cstate: | |
13 | * WFI, CPU_OFF, CLUSTER_OFF | |
14 | * | |
15 | * Every CPU have 5 operating points: | |
16 | * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz | |
17 | * | |
18 | * This code is based on these assumption and other platforms | |
19 | * need to adjust these definitions. | |
20 | */ | |
21 | #define MAX_CPU 8 | |
22 | #define MAX_PSTATE_ENTRIES 5 | |
23 | #define MAX_CSTATE_ENTRIES 3 | |
24 | ||
25 | static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 }; | |
26 | ||
27 | /* | |
28 | * my_map structure is used to record cstate and pstate index and | |
29 | * timestamp (Idx, Ts), when new event incoming we need to update | |
30 | * combination for new state index and timestamp (Idx`, Ts`). | |
31 | * | |
32 | * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time | |
33 | * interval for the previous state: Duration(Idx) = Ts` - Ts. | |
34 | * | |
35 | * Every CPU has one below array for recording state index and | |
36 | * timestamp, and record for cstate and pstate saperately: | |
37 | * | |
38 | * +--------------------------+ | |
39 | * | cstate timestamp | | |
40 | * +--------------------------+ | |
41 | * | cstate index | | |
42 | * +--------------------------+ | |
43 | * | pstate timestamp | | |
44 | * +--------------------------+ | |
45 | * | pstate index | | |
46 | * +--------------------------+ | |
47 | */ | |
48 | #define MAP_OFF_CSTATE_TIME 0 | |
49 | #define MAP_OFF_CSTATE_IDX 1 | |
50 | #define MAP_OFF_PSTATE_TIME 2 | |
51 | #define MAP_OFF_PSTATE_IDX 3 | |
52 | #define MAP_OFF_NUM 4 | |
53 | ||
f0c328f8 DL |
54 | struct { |
55 | __uint(type, BPF_MAP_TYPE_ARRAY); | |
56 | __type(key, u32); | |
57 | __type(value, u64); | |
58 | __uint(max_entries, MAX_CPU * MAP_OFF_NUM); | |
59 | } my_map SEC(".maps"); | |
c5350777 LY |
60 | |
61 | /* cstate_duration records duration time for every idle state per CPU */ | |
f0c328f8 DL |
62 | struct { |
63 | __uint(type, BPF_MAP_TYPE_ARRAY); | |
64 | __type(key, u32); | |
65 | __type(value, u64); | |
66 | __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES); | |
67 | } cstate_duration SEC(".maps"); | |
c5350777 LY |
68 | |
69 | /* pstate_duration records duration time for every operating point per CPU */ | |
f0c328f8 DL |
70 | struct { |
71 | __uint(type, BPF_MAP_TYPE_ARRAY); | |
72 | __type(key, u32); | |
73 | __type(value, u64); | |
74 | __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES); | |
75 | } pstate_duration SEC(".maps"); | |
c5350777 LY |
76 | |
77 | /* | |
78 | * The trace events for cpu_idle and cpu_frequency are taken from: | |
27d7fdf0 RZ |
79 | * /sys/kernel/tracing/events/power/cpu_idle/format |
80 | * /sys/kernel/tracing/events/power/cpu_frequency/format | |
c5350777 LY |
81 | * |
82 | * These two events have same format, so define one common structure. | |
83 | */ | |
84 | struct cpu_args { | |
85 | u64 pad; | |
86 | u32 state; | |
87 | u32 cpu_id; | |
88 | }; | |
89 | ||
90 | /* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */ | |
91 | static u32 find_cpu_pstate_idx(u32 frequency) | |
92 | { | |
93 | u32 i; | |
94 | ||
95 | for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) { | |
96 | if (frequency == cpu_opps[i]) | |
97 | return i; | |
98 | } | |
99 | ||
100 | return i; | |
101 | } | |
102 | ||
103 | SEC("tracepoint/power/cpu_idle") | |
104 | int bpf_prog1(struct cpu_args *ctx) | |
105 | { | |
106 | u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta; | |
107 | u32 key, cpu, pstate_idx; | |
108 | u64 *val; | |
109 | ||
110 | if (ctx->cpu_id > MAX_CPU) | |
111 | return 0; | |
112 | ||
113 | cpu = ctx->cpu_id; | |
114 | ||
115 | key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME; | |
116 | cts = bpf_map_lookup_elem(&my_map, &key); | |
117 | if (!cts) | |
118 | return 0; | |
119 | ||
120 | key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; | |
121 | cstate = bpf_map_lookup_elem(&my_map, &key); | |
122 | if (!cstate) | |
123 | return 0; | |
124 | ||
125 | key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; | |
126 | pts = bpf_map_lookup_elem(&my_map, &key); | |
127 | if (!pts) | |
128 | return 0; | |
129 | ||
130 | key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; | |
131 | pstate = bpf_map_lookup_elem(&my_map, &key); | |
132 | if (!pstate) | |
133 | return 0; | |
134 | ||
135 | prev_state = *cstate; | |
136 | *cstate = ctx->state; | |
137 | ||
138 | if (!*cts) { | |
139 | *cts = bpf_ktime_get_ns(); | |
140 | return 0; | |
141 | } | |
142 | ||
143 | cur_ts = bpf_ktime_get_ns(); | |
144 | delta = cur_ts - *cts; | |
145 | *cts = cur_ts; | |
146 | ||
147 | /* | |
148 | * When state doesn't equal to (u32)-1, the cpu will enter | |
149 | * one idle state; for this case we need to record interval | |
150 | * for the pstate. | |
151 | * | |
152 | * OPP2 | |
153 | * +---------------------+ | |
154 | * OPP1 | | | |
155 | * ---------+ | | |
156 | * | Idle state | |
157 | * +--------------- | |
158 | * | |
159 | * |<- pstate duration ->| | |
160 | * ^ ^ | |
161 | * pts cur_ts | |
162 | */ | |
163 | if (ctx->state != (u32)-1) { | |
164 | ||
165 | /* record pstate after have first cpu_frequency event */ | |
166 | if (!*pts) | |
167 | return 0; | |
168 | ||
169 | delta = cur_ts - *pts; | |
170 | ||
171 | pstate_idx = find_cpu_pstate_idx(*pstate); | |
172 | if (pstate_idx >= MAX_PSTATE_ENTRIES) | |
173 | return 0; | |
174 | ||
175 | key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; | |
176 | val = bpf_map_lookup_elem(&pstate_duration, &key); | |
177 | if (val) | |
178 | __sync_fetch_and_add((long *)val, delta); | |
179 | ||
180 | /* | |
181 | * When state equal to (u32)-1, the cpu just exits from one | |
182 | * specific idle state; for this case we need to record | |
183 | * interval for the pstate. | |
184 | * | |
185 | * OPP2 | |
186 | * -----------+ | |
187 | * | OPP1 | |
188 | * | +----------- | |
189 | * | Idle state | | |
190 | * +---------------------+ | |
191 | * | |
192 | * |<- cstate duration ->| | |
193 | * ^ ^ | |
194 | * cts cur_ts | |
195 | */ | |
196 | } else { | |
197 | ||
198 | key = cpu * MAX_CSTATE_ENTRIES + prev_state; | |
199 | val = bpf_map_lookup_elem(&cstate_duration, &key); | |
200 | if (val) | |
201 | __sync_fetch_and_add((long *)val, delta); | |
202 | } | |
203 | ||
204 | /* Update timestamp for pstate as new start time */ | |
205 | if (*pts) | |
206 | *pts = cur_ts; | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
211 | SEC("tracepoint/power/cpu_frequency") | |
212 | int bpf_prog2(struct cpu_args *ctx) | |
213 | { | |
214 | u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta; | |
215 | u32 key, cpu, pstate_idx; | |
216 | u64 *val; | |
217 | ||
218 | cpu = ctx->cpu_id; | |
219 | ||
220 | key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; | |
221 | pts = bpf_map_lookup_elem(&my_map, &key); | |
222 | if (!pts) | |
223 | return 0; | |
224 | ||
225 | key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; | |
226 | pstate = bpf_map_lookup_elem(&my_map, &key); | |
227 | if (!pstate) | |
228 | return 0; | |
229 | ||
230 | key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; | |
231 | cstate = bpf_map_lookup_elem(&my_map, &key); | |
232 | if (!cstate) | |
233 | return 0; | |
234 | ||
235 | prev_state = *pstate; | |
236 | *pstate = ctx->state; | |
237 | ||
238 | if (!*pts) { | |
239 | *pts = bpf_ktime_get_ns(); | |
240 | return 0; | |
241 | } | |
242 | ||
243 | cur_ts = bpf_ktime_get_ns(); | |
244 | delta = cur_ts - *pts; | |
245 | *pts = cur_ts; | |
246 | ||
247 | /* When CPU is in idle, bail out to skip pstate statistics */ | |
248 | if (*cstate != (u32)(-1)) | |
249 | return 0; | |
250 | ||
251 | /* | |
252 | * The cpu changes to another different OPP (in below diagram | |
253 | * change frequency from OPP3 to OPP1), need recording interval | |
254 | * for previous frequency OPP3 and update timestamp as start | |
255 | * time for new frequency OPP1. | |
256 | * | |
257 | * OPP3 | |
258 | * +---------------------+ | |
259 | * OPP2 | | | |
260 | * ---------+ | | |
261 | * | OPP1 | |
262 | * +--------------- | |
263 | * | |
264 | * |<- pstate duration ->| | |
265 | * ^ ^ | |
266 | * pts cur_ts | |
267 | */ | |
268 | pstate_idx = find_cpu_pstate_idx(*pstate); | |
269 | if (pstate_idx >= MAX_PSTATE_ENTRIES) | |
270 | return 0; | |
271 | ||
272 | key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; | |
273 | val = bpf_map_lookup_elem(&pstate_duration, &key); | |
274 | if (val) | |
275 | __sync_fetch_and_add((long *)val, delta); | |
276 | ||
277 | return 0; | |
278 | } | |
279 | ||
280 | char _license[] SEC("license") = "GPL"; | |
281 | u32 _version SEC("version") = LINUX_VERSION_CODE; |