Commit | Line | Data |
---|---|---|
f87027b9 JO |
1 | #include <stdio.h> |
2 | #include "evsel.h" | |
3 | #include "stat.h" | |
4 | #include "color.h" | |
fb4605ba | 5 | #include "pmu.h" |
37932c18 AK |
6 | #include "rblist.h" |
7 | #include "evlist.h" | |
8 | #include "expr.h" | |
b18f3e36 | 9 | #include "metricgroup.h" |
f87027b9 JO |
10 | |
11 | enum { | |
12 | CTX_BIT_USER = 1 << 0, | |
13 | CTX_BIT_KERNEL = 1 << 1, | |
14 | CTX_BIT_HV = 1 << 2, | |
15 | CTX_BIT_HOST = 1 << 3, | |
16 | CTX_BIT_IDLE = 1 << 4, | |
17 | CTX_BIT_MAX = 1 << 5, | |
18 | }; | |
19 | ||
20 | #define NUM_CTX CTX_BIT_MAX | |
21 | ||
44d49a60 AK |
22 | /* |
23 | * AGGR_GLOBAL: Use CPU 0 | |
24 | * AGGR_SOCKET: Use first CPU of socket | |
25 | * AGGR_CORE: Use first CPU of core | |
26 | * AGGR_NONE: Use matching CPU | |
27 | * AGGR_THREAD: Not supported? | |
28 | */ | |
f87027b9 JO |
29 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
30 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | |
31 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | |
32 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | |
33 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | |
34 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | |
35 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | |
36 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | |
37 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
38 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
39 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
40 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | |
41 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | |
42 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | |
239bd47f AK |
43 | static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; |
44 | static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; | |
45 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; | |
46 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; | |
47 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; | |
daefd0bc KL |
48 | static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; |
49 | static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; | |
37932c18 | 50 | static struct rblist runtime_saved_values; |
fb4605ba | 51 | static bool have_frontend_stalled; |
f87027b9 JO |
52 | |
53 | struct stats walltime_nsecs_stats; | |
54 | ||
37932c18 AK |
55 | struct saved_value { |
56 | struct rb_node rb_node; | |
57 | struct perf_evsel *evsel; | |
58 | int cpu; | |
37932c18 AK |
59 | struct stats stats; |
60 | }; | |
61 | ||
62 | static int saved_value_cmp(struct rb_node *rb_node, const void *entry) | |
63 | { | |
64 | struct saved_value *a = container_of(rb_node, | |
65 | struct saved_value, | |
66 | rb_node); | |
67 | const struct saved_value *b = entry; | |
68 | ||
37932c18 AK |
69 | if (a->cpu != b->cpu) |
70 | return a->cpu - b->cpu; | |
5e97665f AK |
71 | if (a->evsel == b->evsel) |
72 | return 0; | |
73 | if ((char *)a->evsel < (char *)b->evsel) | |
74 | return -1; | |
75 | return +1; | |
37932c18 AK |
76 | } |
77 | ||
78 | static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, | |
79 | const void *entry) | |
80 | { | |
81 | struct saved_value *nd = malloc(sizeof(struct saved_value)); | |
82 | ||
83 | if (!nd) | |
84 | return NULL; | |
85 | memcpy(nd, entry, sizeof(struct saved_value)); | |
86 | return &nd->rb_node; | |
87 | } | |
88 | ||
89 | static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, | |
4e1a0963 | 90 | int cpu, |
37932c18 AK |
91 | bool create) |
92 | { | |
93 | struct rb_node *nd; | |
94 | struct saved_value dm = { | |
95 | .cpu = cpu, | |
37932c18 AK |
96 | .evsel = evsel, |
97 | }; | |
98 | nd = rblist__find(&runtime_saved_values, &dm); | |
99 | if (nd) | |
100 | return container_of(nd, struct saved_value, rb_node); | |
101 | if (create) { | |
102 | rblist__add_node(&runtime_saved_values, &dm); | |
103 | nd = rblist__find(&runtime_saved_values, &dm); | |
104 | if (nd) | |
105 | return container_of(nd, struct saved_value, rb_node); | |
106 | } | |
107 | return NULL; | |
108 | } | |
109 | ||
fb4605ba AK |
110 | void perf_stat__init_shadow_stats(void) |
111 | { | |
112 | have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); | |
37932c18 AK |
113 | rblist__init(&runtime_saved_values); |
114 | runtime_saved_values.node_cmp = saved_value_cmp; | |
115 | runtime_saved_values.node_new = saved_value_new; | |
116 | /* No delete for now */ | |
fb4605ba AK |
117 | } |
118 | ||
f87027b9 JO |
119 | static int evsel_context(struct perf_evsel *evsel) |
120 | { | |
121 | int ctx = 0; | |
122 | ||
123 | if (evsel->attr.exclude_kernel) | |
124 | ctx |= CTX_BIT_KERNEL; | |
125 | if (evsel->attr.exclude_user) | |
126 | ctx |= CTX_BIT_USER; | |
127 | if (evsel->attr.exclude_hv) | |
128 | ctx |= CTX_BIT_HV; | |
129 | if (evsel->attr.exclude_host) | |
130 | ctx |= CTX_BIT_HOST; | |
131 | if (evsel->attr.exclude_idle) | |
132 | ctx |= CTX_BIT_IDLE; | |
133 | ||
134 | return ctx; | |
135 | } | |
136 | ||
137 | void perf_stat__reset_shadow_stats(void) | |
138 | { | |
37932c18 AK |
139 | struct rb_node *pos, *next; |
140 | ||
f87027b9 JO |
141 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); |
142 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | |
143 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | |
144 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | |
145 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | |
146 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | |
147 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | |
148 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | |
149 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | |
150 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | |
151 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | |
152 | memset(runtime_cycles_in_tx_stats, 0, | |
153 | sizeof(runtime_cycles_in_tx_stats)); | |
154 | memset(runtime_transaction_stats, 0, | |
155 | sizeof(runtime_transaction_stats)); | |
156 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | |
157 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | |
239bd47f AK |
158 | memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); |
159 | memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); | |
160 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); | |
161 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); | |
162 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); | |
daefd0bc KL |
163 | memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); |
164 | memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); | |
37932c18 AK |
165 | |
166 | next = rb_first(&runtime_saved_values.entries); | |
167 | while (next) { | |
168 | pos = next; | |
169 | next = rb_next(pos); | |
170 | memset(&container_of(pos, struct saved_value, rb_node)->stats, | |
171 | 0, | |
172 | sizeof(struct stats)); | |
173 | } | |
f87027b9 JO |
174 | } |
175 | ||
176 | /* | |
177 | * Update various tracking values we maintain to print | |
178 | * more semantic information such as miss/hit ratios, | |
179 | * instruction rates, etc: | |
180 | */ | |
181 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |
182 | int cpu) | |
183 | { | |
184 | int ctx = evsel_context(counter); | |
185 | ||
daf4f478 NK |
186 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || |
187 | perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) | |
f87027b9 JO |
188 | update_stats(&runtime_nsecs_stats[cpu], count[0]); |
189 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | |
190 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | |
191 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | |
54976285 | 192 | update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); |
f87027b9 JO |
193 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) |
194 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | |
195 | else if (perf_stat_evsel__is(counter, ELISION_START)) | |
196 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | |
239bd47f AK |
197 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) |
198 | update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); | |
199 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) | |
200 | update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); | |
201 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) | |
202 | update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); | |
203 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) | |
204 | update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); | |
205 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) | |
206 | update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); | |
f87027b9 JO |
207 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) |
208 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | |
209 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | |
210 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); | |
211 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | |
212 | update_stats(&runtime_branches_stats[ctx][cpu], count[0]); | |
213 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | |
214 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); | |
215 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | |
216 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); | |
217 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | |
218 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | |
219 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | |
220 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | |
221 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | |
222 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | |
223 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | |
224 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | |
daefd0bc KL |
225 | else if (perf_stat_evsel__is(counter, SMI_NUM)) |
226 | update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]); | |
227 | else if (perf_stat_evsel__is(counter, APERF)) | |
228 | update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); | |
37932c18 AK |
229 | |
230 | if (counter->collect_stat) { | |
4e1a0963 | 231 | struct saved_value *v = saved_value_lookup(counter, cpu, true); |
37932c18 AK |
232 | update_stats(&v->stats, count[0]); |
233 | } | |
f87027b9 JO |
234 | } |
235 | ||
236 | /* used for get_ratio_color() */ | |
237 | enum grc_type { | |
238 | GRC_STALLED_CYCLES_FE, | |
239 | GRC_STALLED_CYCLES_BE, | |
240 | GRC_CACHE_MISSES, | |
241 | GRC_MAX_NR | |
242 | }; | |
243 | ||
244 | static const char *get_ratio_color(enum grc_type type, double ratio) | |
245 | { | |
246 | static const double grc_table[GRC_MAX_NR][3] = { | |
247 | [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, | |
248 | [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, | |
249 | [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, | |
250 | }; | |
251 | const char *color = PERF_COLOR_NORMAL; | |
252 | ||
253 | if (ratio > grc_table[type][0]) | |
254 | color = PERF_COLOR_RED; | |
255 | else if (ratio > grc_table[type][1]) | |
256 | color = PERF_COLOR_MAGENTA; | |
257 | else if (ratio > grc_table[type][2]) | |
258 | color = PERF_COLOR_YELLOW; | |
259 | ||
260 | return color; | |
261 | } | |
262 | ||
37932c18 AK |
263 | static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list, |
264 | const char *name) | |
265 | { | |
266 | struct perf_evsel *c2; | |
267 | ||
268 | evlist__for_each_entry (evsel_list, c2) { | |
269 | if (!strcasecmp(c2->name, name)) | |
270 | return c2; | |
271 | } | |
272 | return NULL; | |
273 | } | |
274 | ||
275 | /* Mark MetricExpr target events and link events using them to them. */ | |
276 | void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) | |
277 | { | |
278 | struct perf_evsel *counter, *leader, **metric_events, *oc; | |
279 | bool found; | |
280 | const char **metric_names; | |
281 | int i; | |
282 | int num_metric_names; | |
283 | ||
284 | evlist__for_each_entry(evsel_list, counter) { | |
285 | bool invalid = false; | |
286 | ||
287 | leader = counter->leader; | |
288 | if (!counter->metric_expr) | |
289 | continue; | |
290 | metric_events = counter->metric_events; | |
291 | if (!metric_events) { | |
292 | if (expr__find_other(counter->metric_expr, counter->name, | |
293 | &metric_names, &num_metric_names) < 0) | |
294 | continue; | |
295 | ||
296 | metric_events = calloc(sizeof(struct perf_evsel *), | |
297 | num_metric_names + 1); | |
298 | if (!metric_events) | |
299 | return; | |
300 | counter->metric_events = metric_events; | |
301 | } | |
302 | ||
303 | for (i = 0; i < num_metric_names; i++) { | |
304 | found = false; | |
305 | if (leader) { | |
306 | /* Search in group */ | |
307 | for_each_group_member (oc, leader) { | |
308 | if (!strcasecmp(oc->name, metric_names[i])) { | |
309 | found = true; | |
310 | break; | |
311 | } | |
312 | } | |
313 | } | |
314 | if (!found) { | |
315 | /* Search ignoring groups */ | |
316 | oc = perf_stat__find_event(evsel_list, metric_names[i]); | |
317 | } | |
318 | if (!oc) { | |
319 | /* Deduping one is good enough to handle duplicated PMUs. */ | |
320 | static char *printed; | |
321 | ||
322 | /* | |
323 | * Adding events automatically would be difficult, because | |
324 | * it would risk creating groups that are not schedulable. | |
325 | * perf stat doesn't understand all the scheduling constraints | |
326 | * of events. So we ask the user instead to add the missing | |
327 | * events. | |
328 | */ | |
329 | if (!printed || strcasecmp(printed, metric_names[i])) { | |
330 | fprintf(stderr, | |
331 | "Add %s event to groups to get metric expression for %s\n", | |
332 | metric_names[i], | |
333 | counter->name); | |
334 | printed = strdup(metric_names[i]); | |
335 | } | |
336 | invalid = true; | |
337 | continue; | |
338 | } | |
339 | metric_events[i] = oc; | |
340 | oc->collect_stat = true; | |
341 | } | |
342 | metric_events[i] = NULL; | |
343 | free(metric_names); | |
344 | if (invalid) { | |
345 | free(metric_events); | |
346 | counter->metric_events = NULL; | |
347 | counter->metric_expr = NULL; | |
348 | } | |
349 | } | |
350 | } | |
351 | ||
140aeadc | 352 | static void print_stalled_cycles_frontend(int cpu, |
b8f8eb84 | 353 | struct perf_evsel *evsel, double avg, |
140aeadc | 354 | struct perf_stat_output_ctx *out) |
f87027b9 JO |
355 | { |
356 | double total, ratio = 0.0; | |
357 | const char *color; | |
358 | int ctx = evsel_context(evsel); | |
359 | ||
360 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
361 | ||
362 | if (total) | |
363 | ratio = avg / total * 100.0; | |
364 | ||
365 | color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); | |
366 | ||
140aeadc AK |
367 | if (ratio) |
368 | out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", | |
369 | ratio); | |
370 | else | |
371 | out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); | |
f87027b9 JO |
372 | } |
373 | ||
140aeadc | 374 | static void print_stalled_cycles_backend(int cpu, |
b8f8eb84 | 375 | struct perf_evsel *evsel, double avg, |
140aeadc | 376 | struct perf_stat_output_ctx *out) |
f87027b9 JO |
377 | { |
378 | double total, ratio = 0.0; | |
379 | const char *color; | |
380 | int ctx = evsel_context(evsel); | |
381 | ||
382 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
383 | ||
384 | if (total) | |
385 | ratio = avg / total * 100.0; | |
386 | ||
387 | color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); | |
388 | ||
b0404be8 | 389 | out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); |
f87027b9 JO |
390 | } |
391 | ||
140aeadc | 392 | static void print_branch_misses(int cpu, |
b8f8eb84 | 393 | struct perf_evsel *evsel, |
140aeadc AK |
394 | double avg, |
395 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
396 | { |
397 | double total, ratio = 0.0; | |
398 | const char *color; | |
399 | int ctx = evsel_context(evsel); | |
400 | ||
401 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | |
402 | ||
403 | if (total) | |
404 | ratio = avg / total * 100.0; | |
405 | ||
406 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
407 | ||
140aeadc | 408 | out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); |
f87027b9 JO |
409 | } |
410 | ||
140aeadc | 411 | static void print_l1_dcache_misses(int cpu, |
b8f8eb84 | 412 | struct perf_evsel *evsel, |
140aeadc AK |
413 | double avg, |
414 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
415 | { |
416 | double total, ratio = 0.0; | |
417 | const char *color; | |
418 | int ctx = evsel_context(evsel); | |
419 | ||
420 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | |
421 | ||
422 | if (total) | |
423 | ratio = avg / total * 100.0; | |
424 | ||
425 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
426 | ||
140aeadc | 427 | out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); |
f87027b9 JO |
428 | } |
429 | ||
140aeadc | 430 | static void print_l1_icache_misses(int cpu, |
b8f8eb84 | 431 | struct perf_evsel *evsel, |
140aeadc AK |
432 | double avg, |
433 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
434 | { |
435 | double total, ratio = 0.0; | |
436 | const char *color; | |
437 | int ctx = evsel_context(evsel); | |
438 | ||
439 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | |
440 | ||
441 | if (total) | |
442 | ratio = avg / total * 100.0; | |
443 | ||
444 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 445 | out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); |
f87027b9 JO |
446 | } |
447 | ||
140aeadc | 448 | static void print_dtlb_cache_misses(int cpu, |
b8f8eb84 | 449 | struct perf_evsel *evsel, |
140aeadc AK |
450 | double avg, |
451 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
452 | { |
453 | double total, ratio = 0.0; | |
454 | const char *color; | |
455 | int ctx = evsel_context(evsel); | |
456 | ||
457 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | |
458 | ||
459 | if (total) | |
460 | ratio = avg / total * 100.0; | |
461 | ||
462 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 463 | out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); |
f87027b9 JO |
464 | } |
465 | ||
140aeadc | 466 | static void print_itlb_cache_misses(int cpu, |
b8f8eb84 | 467 | struct perf_evsel *evsel, |
140aeadc AK |
468 | double avg, |
469 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
470 | { |
471 | double total, ratio = 0.0; | |
472 | const char *color; | |
473 | int ctx = evsel_context(evsel); | |
474 | ||
475 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | |
476 | ||
477 | if (total) | |
478 | ratio = avg / total * 100.0; | |
479 | ||
480 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 481 | out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); |
f87027b9 JO |
482 | } |
483 | ||
140aeadc | 484 | static void print_ll_cache_misses(int cpu, |
b8f8eb84 | 485 | struct perf_evsel *evsel, |
140aeadc AK |
486 | double avg, |
487 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
488 | { |
489 | double total, ratio = 0.0; | |
490 | const char *color; | |
491 | int ctx = evsel_context(evsel); | |
492 | ||
493 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | |
494 | ||
495 | if (total) | |
496 | ratio = avg / total * 100.0; | |
497 | ||
498 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 499 | out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); |
f87027b9 JO |
500 | } |
501 | ||
239bd47f AK |
502 | /* |
503 | * High level "TopDown" CPU core pipe line bottleneck break down. | |
504 | * | |
505 | * Basic concept following | |
506 | * Yasin, A Top Down Method for Performance analysis and Counter architecture | |
507 | * ISPASS14 | |
508 | * | |
509 | * The CPU pipeline is divided into 4 areas that can be bottlenecks: | |
510 | * | |
511 | * Frontend -> Backend -> Retiring | |
512 | * BadSpeculation in addition means out of order execution that is thrown away | |
513 | * (for example branch mispredictions) | |
514 | * Frontend is instruction decoding. | |
515 | * Backend is execution, like computation and accessing data in memory | |
516 | * Retiring is good execution that is not directly bottlenecked | |
517 | * | |
518 | * The formulas are computed in slots. | |
519 | * A slot is an entry in the pipeline each for the pipeline width | |
520 | * (for example a 4-wide pipeline has 4 slots for each cycle) | |
521 | * | |
522 | * Formulas: | |
523 | * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / | |
524 | * TotalSlots | |
525 | * Retiring = SlotsRetired / TotalSlots | |
526 | * FrontendBound = FetchBubbles / TotalSlots | |
527 | * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound | |
528 | * | |
529 | * The kernel provides the mapping to the low level CPU events and any scaling | |
530 | * needed for the CPU pipeline width, for example: | |
531 | * | |
532 | * TotalSlots = Cycles * 4 | |
533 | * | |
534 | * The scaling factor is communicated in the sysfs unit. | |
535 | * | |
536 | * In some cases the CPU may not be able to measure all the formulas due to | |
537 | * missing events. In this case multiple formulas are combined, as possible. | |
538 | * | |
539 | * Full TopDown supports more levels to sub-divide each area: for example | |
540 | * BackendBound into computing bound and memory bound. For now we only | |
541 | * support Level 1 TopDown. | |
542 | */ | |
543 | ||
544 | static double sanitize_val(double x) | |
545 | { | |
546 | if (x < 0 && x >= -0.02) | |
547 | return 0.0; | |
548 | return x; | |
549 | } | |
550 | ||
551 | static double td_total_slots(int ctx, int cpu) | |
552 | { | |
553 | return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); | |
554 | } | |
555 | ||
556 | static double td_bad_spec(int ctx, int cpu) | |
557 | { | |
558 | double bad_spec = 0; | |
559 | double total_slots; | |
560 | double total; | |
561 | ||
562 | total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - | |
563 | avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + | |
564 | avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); | |
565 | total_slots = td_total_slots(ctx, cpu); | |
566 | if (total_slots) | |
567 | bad_spec = total / total_slots; | |
568 | return sanitize_val(bad_spec); | |
569 | } | |
570 | ||
571 | static double td_retiring(int ctx, int cpu) | |
572 | { | |
573 | double retiring = 0; | |
574 | double total_slots = td_total_slots(ctx, cpu); | |
575 | double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); | |
576 | ||
577 | if (total_slots) | |
578 | retiring = ret_slots / total_slots; | |
579 | return retiring; | |
580 | } | |
581 | ||
582 | static double td_fe_bound(int ctx, int cpu) | |
583 | { | |
584 | double fe_bound = 0; | |
585 | double total_slots = td_total_slots(ctx, cpu); | |
586 | double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); | |
587 | ||
588 | if (total_slots) | |
589 | fe_bound = fetch_bub / total_slots; | |
590 | return fe_bound; | |
591 | } | |
592 | ||
593 | static double td_be_bound(int ctx, int cpu) | |
594 | { | |
595 | double sum = (td_fe_bound(ctx, cpu) + | |
596 | td_bad_spec(ctx, cpu) + | |
597 | td_retiring(ctx, cpu)); | |
598 | if (sum == 0) | |
599 | return 0; | |
600 | return sanitize_val(1.0 - sum); | |
601 | } | |
602 | ||
daefd0bc KL |
603 | static void print_smi_cost(int cpu, struct perf_evsel *evsel, |
604 | struct perf_stat_output_ctx *out) | |
605 | { | |
606 | double smi_num, aperf, cycles, cost = 0.0; | |
607 | int ctx = evsel_context(evsel); | |
608 | const char *color = NULL; | |
609 | ||
610 | smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); | |
611 | aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); | |
612 | cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
613 | ||
614 | if ((cycles == 0) || (aperf == 0)) | |
615 | return; | |
616 | ||
617 | if (smi_num) | |
618 | cost = (aperf - cycles) / aperf * 100.00; | |
619 | ||
620 | if (cost > 10) | |
621 | color = PERF_COLOR_RED; | |
622 | out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); | |
623 | out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); | |
624 | } | |
625 | ||
bba49af8 AK |
626 | static void generic_metric(const char *metric_expr, |
627 | struct perf_evsel **metric_events, | |
628 | char *name, | |
629 | const char *metric_name, | |
630 | double avg, | |
631 | int cpu, | |
bba49af8 AK |
632 | struct perf_stat_output_ctx *out) |
633 | { | |
634 | print_metric_t print_metric = out->print_metric; | |
635 | struct parse_ctx pctx; | |
636 | double ratio; | |
637 | int i; | |
638 | void *ctxp = out->ctx; | |
639 | ||
640 | expr__ctx_init(&pctx); | |
641 | expr__add_id(&pctx, name, avg); | |
642 | for (i = 0; metric_events[i]; i++) { | |
643 | struct saved_value *v; | |
fd48aad9 AK |
644 | struct stats *stats; |
645 | double scale; | |
646 | ||
647 | if (!strcmp(metric_events[i]->name, "duration_time")) { | |
648 | stats = &walltime_nsecs_stats; | |
649 | scale = 1e-9; | |
650 | } else { | |
651 | v = saved_value_lookup(metric_events[i], cpu, false); | |
652 | if (!v) | |
653 | break; | |
654 | stats = &v->stats; | |
655 | scale = 1.0; | |
656 | } | |
657 | expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); | |
bba49af8 AK |
658 | } |
659 | if (!metric_events[i]) { | |
660 | const char *p = metric_expr; | |
661 | ||
662 | if (expr__parse(&ratio, &pctx, &p) == 0) | |
663 | print_metric(ctxp, NULL, "%8.1f", | |
664 | metric_name ? | |
665 | metric_name : | |
666 | out->force_header ? name : "", | |
667 | ratio); | |
668 | else | |
4ed962eb AK |
669 | print_metric(ctxp, NULL, NULL, |
670 | out->force_header ? | |
671 | (metric_name ? metric_name : name) : "", 0); | |
bba49af8 AK |
672 | } else |
673 | print_metric(ctxp, NULL, NULL, "", 0); | |
674 | } | |
675 | ||
140aeadc AK |
676 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, |
677 | double avg, int cpu, | |
b18f3e36 AK |
678 | struct perf_stat_output_ctx *out, |
679 | struct rblist *metric_events) | |
f87027b9 | 680 | { |
140aeadc AK |
681 | void *ctxp = out->ctx; |
682 | print_metric_t print_metric = out->print_metric; | |
f87027b9 | 683 | double total, ratio = 0.0, total2; |
239bd47f | 684 | const char *color = NULL; |
f87027b9 | 685 | int ctx = evsel_context(evsel); |
b18f3e36 AK |
686 | struct metric_event *me; |
687 | int num = 1; | |
f87027b9 JO |
688 | |
689 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | |
690 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
691 | if (total) { | |
692 | ratio = avg / total; | |
140aeadc AK |
693 | print_metric(ctxp, NULL, "%7.2f ", |
694 | "insn per cycle", ratio); | |
f87027b9 | 695 | } else { |
140aeadc | 696 | print_metric(ctxp, NULL, NULL, "insn per cycle", 0); |
f87027b9 JO |
697 | } |
698 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | |
699 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | |
700 | ||
701 | if (total && avg) { | |
92a61f64 | 702 | out->new_line(ctxp); |
f87027b9 | 703 | ratio = total / avg; |
140aeadc AK |
704 | print_metric(ctxp, NULL, "%7.2f ", |
705 | "stalled cycles per insn", | |
706 | ratio); | |
fb4605ba | 707 | } else if (have_frontend_stalled) { |
140aeadc AK |
708 | print_metric(ctxp, NULL, NULL, |
709 | "stalled cycles per insn", 0); | |
f87027b9 | 710 | } |
140aeadc AK |
711 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { |
712 | if (runtime_branches_stats[ctx][cpu].n != 0) | |
713 | print_branch_misses(cpu, evsel, avg, out); | |
714 | else | |
715 | print_metric(ctxp, NULL, NULL, "of all branches", 0); | |
f87027b9 JO |
716 | } else if ( |
717 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
718 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | |
719 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
720 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
721 | if (runtime_l1_dcache_stats[ctx][cpu].n != 0) | |
722 | print_l1_dcache_misses(cpu, evsel, avg, out); | |
723 | else | |
724 | print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); | |
f87027b9 JO |
725 | } else if ( |
726 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
727 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | |
728 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
729 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
730 | if (runtime_l1_icache_stats[ctx][cpu].n != 0) | |
731 | print_l1_icache_misses(cpu, evsel, avg, out); | |
732 | else | |
733 | print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); | |
f87027b9 JO |
734 | } else if ( |
735 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
736 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | |
737 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
738 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
739 | if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) | |
740 | print_dtlb_cache_misses(cpu, evsel, avg, out); | |
741 | else | |
742 | print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); | |
f87027b9 JO |
743 | } else if ( |
744 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
745 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | |
746 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
747 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
748 | if (runtime_itlb_cache_stats[ctx][cpu].n != 0) | |
749 | print_itlb_cache_misses(cpu, evsel, avg, out); | |
750 | else | |
751 | print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); | |
f87027b9 JO |
752 | } else if ( |
753 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
754 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | |
755 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
756 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
757 | if (runtime_ll_cache_stats[ctx][cpu].n != 0) | |
758 | print_ll_cache_misses(cpu, evsel, avg, out); | |
759 | else | |
760 | print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); | |
761 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { | |
f87027b9 JO |
762 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); |
763 | ||
764 | if (total) | |
765 | ratio = avg * 100 / total; | |
766 | ||
140aeadc AK |
767 | if (runtime_cacherefs_stats[ctx][cpu].n != 0) |
768 | print_metric(ctxp, NULL, "%8.3f %%", | |
769 | "of all cache refs", ratio); | |
770 | else | |
771 | print_metric(ctxp, NULL, NULL, "of all cache refs", 0); | |
f87027b9 | 772 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { |
140aeadc | 773 | print_stalled_cycles_frontend(cpu, evsel, avg, out); |
f87027b9 | 774 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { |
140aeadc | 775 | print_stalled_cycles_backend(cpu, evsel, avg, out); |
f87027b9 JO |
776 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { |
777 | total = avg_stats(&runtime_nsecs_stats[cpu]); | |
778 | ||
779 | if (total) { | |
780 | ratio = avg / total; | |
140aeadc | 781 | print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); |
f87027b9 | 782 | } else { |
140aeadc | 783 | print_metric(ctxp, NULL, NULL, "Ghz", 0); |
f87027b9 JO |
784 | } |
785 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { | |
786 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
787 | if (total) | |
140aeadc AK |
788 | print_metric(ctxp, NULL, |
789 | "%7.2f%%", "transactional cycles", | |
790 | 100.0 * (avg / total)); | |
791 | else | |
792 | print_metric(ctxp, NULL, NULL, "transactional cycles", | |
793 | 0); | |
f87027b9 JO |
794 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { |
795 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
796 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | |
797 | if (total2 < avg) | |
798 | total2 = avg; | |
799 | if (total) | |
140aeadc | 800 | print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", |
f87027b9 | 801 | 100.0 * ((total2-avg) / total)); |
140aeadc AK |
802 | else |
803 | print_metric(ctxp, NULL, NULL, "aborted cycles", 0); | |
804 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { | |
f87027b9 JO |
805 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
806 | ||
54976285 | 807 | if (avg) |
f87027b9 JO |
808 | ratio = total / avg; |
809 | ||
140aeadc AK |
810 | if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) |
811 | print_metric(ctxp, NULL, "%8.0f", | |
812 | "cycles / transaction", ratio); | |
813 | else | |
814 | print_metric(ctxp, NULL, NULL, "cycles / transaction", | |
815 | 0); | |
816 | } else if (perf_stat_evsel__is(evsel, ELISION_START)) { | |
f87027b9 JO |
817 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
818 | ||
54976285 | 819 | if (avg) |
f87027b9 JO |
820 | ratio = total / avg; |
821 | ||
140aeadc | 822 | print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); |
daf4f478 NK |
823 | } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) || |
824 | perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) { | |
4579ecc8 | 825 | if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) |
140aeadc AK |
826 | print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", |
827 | avg / ratio); | |
4579ecc8 | 828 | else |
140aeadc | 829 | print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); |
239bd47f AK |
830 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { |
831 | double fe_bound = td_fe_bound(ctx, cpu); | |
832 | ||
833 | if (fe_bound > 0.2) | |
834 | color = PERF_COLOR_RED; | |
835 | print_metric(ctxp, color, "%8.1f%%", "frontend bound", | |
836 | fe_bound * 100.); | |
837 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { | |
838 | double retiring = td_retiring(ctx, cpu); | |
839 | ||
840 | if (retiring > 0.7) | |
841 | color = PERF_COLOR_GREEN; | |
842 | print_metric(ctxp, color, "%8.1f%%", "retiring", | |
843 | retiring * 100.); | |
844 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { | |
845 | double bad_spec = td_bad_spec(ctx, cpu); | |
846 | ||
847 | if (bad_spec > 0.1) | |
848 | color = PERF_COLOR_RED; | |
849 | print_metric(ctxp, color, "%8.1f%%", "bad speculation", | |
850 | bad_spec * 100.); | |
851 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { | |
852 | double be_bound = td_be_bound(ctx, cpu); | |
853 | const char *name = "backend bound"; | |
854 | static int have_recovery_bubbles = -1; | |
855 | ||
856 | /* In case the CPU does not support topdown-recovery-bubbles */ | |
857 | if (have_recovery_bubbles < 0) | |
858 | have_recovery_bubbles = pmu_have_event("cpu", | |
859 | "topdown-recovery-bubbles"); | |
860 | if (!have_recovery_bubbles) | |
861 | name = "backend bound/bad spec"; | |
862 | ||
863 | if (be_bound > 0.2) | |
864 | color = PERF_COLOR_RED; | |
865 | if (td_total_slots(ctx, cpu) > 0) | |
866 | print_metric(ctxp, color, "%8.1f%%", name, | |
867 | be_bound * 100.); | |
868 | else | |
869 | print_metric(ctxp, NULL, NULL, name, 0); | |
37932c18 | 870 | } else if (evsel->metric_expr) { |
bba49af8 | 871 | generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, |
4e1a0963 | 872 | evsel->metric_name, avg, cpu, out); |
f87027b9 JO |
873 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
874 | char unit = 'M'; | |
140aeadc | 875 | char unit_buf[10]; |
f87027b9 JO |
876 | |
877 | total = avg_stats(&runtime_nsecs_stats[cpu]); | |
878 | ||
879 | if (total) | |
880 | ratio = 1000.0 * avg / total; | |
881 | if (ratio < 0.001) { | |
882 | ratio *= 1000; | |
883 | unit = 'K'; | |
884 | } | |
140aeadc AK |
885 | snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); |
886 | print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); | |
daefd0bc KL |
887 | } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { |
888 | print_smi_cost(cpu, evsel, out); | |
f87027b9 | 889 | } else { |
b18f3e36 | 890 | num = 0; |
f87027b9 | 891 | } |
b18f3e36 AK |
892 | |
893 | if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { | |
894 | struct metric_expr *mexp; | |
895 | ||
896 | list_for_each_entry (mexp, &me->head, nd) { | |
897 | if (num++ > 0) | |
898 | out->new_line(ctxp); | |
899 | generic_metric(mexp->metric_expr, mexp->metric_events, | |
900 | evsel->name, mexp->metric_name, | |
4e1a0963 | 901 | avg, cpu, out); |
b18f3e36 AK |
902 | } |
903 | } | |
904 | if (num == 0) | |
905 | print_metric(ctxp, NULL, NULL, NULL, 0); | |
f87027b9 | 906 | } |