Commit | Line | Data |
---|---|---|
f87027b9 JO |
1 | #include <stdio.h> |
2 | #include "evsel.h" | |
3 | #include "stat.h" | |
4 | #include "color.h" | |
fb4605ba | 5 | #include "pmu.h" |
f87027b9 JO |
6 | |
7 | enum { | |
8 | CTX_BIT_USER = 1 << 0, | |
9 | CTX_BIT_KERNEL = 1 << 1, | |
10 | CTX_BIT_HV = 1 << 2, | |
11 | CTX_BIT_HOST = 1 << 3, | |
12 | CTX_BIT_IDLE = 1 << 4, | |
13 | CTX_BIT_MAX = 1 << 5, | |
14 | }; | |
15 | ||
16 | #define NUM_CTX CTX_BIT_MAX | |
17 | ||
44d49a60 AK |
18 | /* |
19 | * AGGR_GLOBAL: Use CPU 0 | |
20 | * AGGR_SOCKET: Use first CPU of socket | |
21 | * AGGR_CORE: Use first CPU of core | |
22 | * AGGR_NONE: Use matching CPU | |
23 | * AGGR_THREAD: Not supported? | |
24 | */ | |
f87027b9 JO |
25 | static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; |
26 | static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; | |
27 | static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; | |
28 | static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; | |
29 | static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; | |
30 | static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; | |
31 | static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; | |
32 | static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; | |
33 | static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
34 | static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
35 | static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; | |
36 | static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; | |
37 | static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; | |
38 | static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; | |
239bd47f AK |
39 | static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; |
40 | static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; | |
41 | static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; | |
42 | static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; | |
43 | static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; | |
fb4605ba | 44 | static bool have_frontend_stalled; |
f87027b9 JO |
45 | |
46 | struct stats walltime_nsecs_stats; | |
47 | ||
fb4605ba AK |
48 | void perf_stat__init_shadow_stats(void) |
49 | { | |
50 | have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); | |
51 | } | |
52 | ||
f87027b9 JO |
53 | static int evsel_context(struct perf_evsel *evsel) |
54 | { | |
55 | int ctx = 0; | |
56 | ||
57 | if (evsel->attr.exclude_kernel) | |
58 | ctx |= CTX_BIT_KERNEL; | |
59 | if (evsel->attr.exclude_user) | |
60 | ctx |= CTX_BIT_USER; | |
61 | if (evsel->attr.exclude_hv) | |
62 | ctx |= CTX_BIT_HV; | |
63 | if (evsel->attr.exclude_host) | |
64 | ctx |= CTX_BIT_HOST; | |
65 | if (evsel->attr.exclude_idle) | |
66 | ctx |= CTX_BIT_IDLE; | |
67 | ||
68 | return ctx; | |
69 | } | |
70 | ||
71 | void perf_stat__reset_shadow_stats(void) | |
72 | { | |
73 | memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); | |
74 | memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); | |
75 | memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); | |
76 | memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); | |
77 | memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); | |
78 | memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); | |
79 | memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); | |
80 | memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); | |
81 | memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); | |
82 | memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); | |
83 | memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); | |
84 | memset(runtime_cycles_in_tx_stats, 0, | |
85 | sizeof(runtime_cycles_in_tx_stats)); | |
86 | memset(runtime_transaction_stats, 0, | |
87 | sizeof(runtime_transaction_stats)); | |
88 | memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); | |
89 | memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); | |
239bd47f AK |
90 | memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); |
91 | memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); | |
92 | memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); | |
93 | memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); | |
94 | memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); | |
f87027b9 JO |
95 | } |
96 | ||
97 | /* | |
98 | * Update various tracking values we maintain to print | |
99 | * more semantic information such as miss/hit ratios, | |
100 | * instruction rates, etc: | |
101 | */ | |
102 | void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |
103 | int cpu) | |
104 | { | |
105 | int ctx = evsel_context(counter); | |
106 | ||
daf4f478 NK |
107 | if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || |
108 | perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) | |
f87027b9 JO |
109 | update_stats(&runtime_nsecs_stats[cpu], count[0]); |
110 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | |
111 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | |
112 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | |
54976285 | 113 | update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); |
f87027b9 JO |
114 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) |
115 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | |
116 | else if (perf_stat_evsel__is(counter, ELISION_START)) | |
117 | update_stats(&runtime_elision_stats[ctx][cpu], count[0]); | |
239bd47f AK |
118 | else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) |
119 | update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); | |
120 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) | |
121 | update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); | |
122 | else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) | |
123 | update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); | |
124 | else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) | |
125 | update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); | |
126 | else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) | |
127 | update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); | |
f87027b9 JO |
128 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) |
129 | update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); | |
130 | else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) | |
131 | update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); | |
132 | else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | |
133 | update_stats(&runtime_branches_stats[ctx][cpu], count[0]); | |
134 | else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) | |
135 | update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); | |
136 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) | |
137 | update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); | |
138 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) | |
139 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | |
140 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) | |
141 | update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); | |
142 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) | |
143 | update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); | |
144 | else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) | |
145 | update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); | |
146 | } | |
147 | ||
148 | /* used for get_ratio_color() */ | |
149 | enum grc_type { | |
150 | GRC_STALLED_CYCLES_FE, | |
151 | GRC_STALLED_CYCLES_BE, | |
152 | GRC_CACHE_MISSES, | |
153 | GRC_MAX_NR | |
154 | }; | |
155 | ||
156 | static const char *get_ratio_color(enum grc_type type, double ratio) | |
157 | { | |
158 | static const double grc_table[GRC_MAX_NR][3] = { | |
159 | [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, | |
160 | [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, | |
161 | [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, | |
162 | }; | |
163 | const char *color = PERF_COLOR_NORMAL; | |
164 | ||
165 | if (ratio > grc_table[type][0]) | |
166 | color = PERF_COLOR_RED; | |
167 | else if (ratio > grc_table[type][1]) | |
168 | color = PERF_COLOR_MAGENTA; | |
169 | else if (ratio > grc_table[type][2]) | |
170 | color = PERF_COLOR_YELLOW; | |
171 | ||
172 | return color; | |
173 | } | |
174 | ||
140aeadc | 175 | static void print_stalled_cycles_frontend(int cpu, |
b8f8eb84 | 176 | struct perf_evsel *evsel, double avg, |
140aeadc | 177 | struct perf_stat_output_ctx *out) |
f87027b9 JO |
178 | { |
179 | double total, ratio = 0.0; | |
180 | const char *color; | |
181 | int ctx = evsel_context(evsel); | |
182 | ||
183 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
184 | ||
185 | if (total) | |
186 | ratio = avg / total * 100.0; | |
187 | ||
188 | color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); | |
189 | ||
140aeadc AK |
190 | if (ratio) |
191 | out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", | |
192 | ratio); | |
193 | else | |
194 | out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); | |
f87027b9 JO |
195 | } |
196 | ||
140aeadc | 197 | static void print_stalled_cycles_backend(int cpu, |
b8f8eb84 | 198 | struct perf_evsel *evsel, double avg, |
140aeadc | 199 | struct perf_stat_output_ctx *out) |
f87027b9 JO |
200 | { |
201 | double total, ratio = 0.0; | |
202 | const char *color; | |
203 | int ctx = evsel_context(evsel); | |
204 | ||
205 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
206 | ||
207 | if (total) | |
208 | ratio = avg / total * 100.0; | |
209 | ||
210 | color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); | |
211 | ||
b0404be8 | 212 | out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); |
f87027b9 JO |
213 | } |
214 | ||
140aeadc | 215 | static void print_branch_misses(int cpu, |
b8f8eb84 | 216 | struct perf_evsel *evsel, |
140aeadc AK |
217 | double avg, |
218 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
219 | { |
220 | double total, ratio = 0.0; | |
221 | const char *color; | |
222 | int ctx = evsel_context(evsel); | |
223 | ||
224 | total = avg_stats(&runtime_branches_stats[ctx][cpu]); | |
225 | ||
226 | if (total) | |
227 | ratio = avg / total * 100.0; | |
228 | ||
229 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
230 | ||
140aeadc | 231 | out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); |
f87027b9 JO |
232 | } |
233 | ||
140aeadc | 234 | static void print_l1_dcache_misses(int cpu, |
b8f8eb84 | 235 | struct perf_evsel *evsel, |
140aeadc AK |
236 | double avg, |
237 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
238 | { |
239 | double total, ratio = 0.0; | |
240 | const char *color; | |
241 | int ctx = evsel_context(evsel); | |
242 | ||
243 | total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); | |
244 | ||
245 | if (total) | |
246 | ratio = avg / total * 100.0; | |
247 | ||
248 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
249 | ||
140aeadc | 250 | out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); |
f87027b9 JO |
251 | } |
252 | ||
140aeadc | 253 | static void print_l1_icache_misses(int cpu, |
b8f8eb84 | 254 | struct perf_evsel *evsel, |
140aeadc AK |
255 | double avg, |
256 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
257 | { |
258 | double total, ratio = 0.0; | |
259 | const char *color; | |
260 | int ctx = evsel_context(evsel); | |
261 | ||
262 | total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); | |
263 | ||
264 | if (total) | |
265 | ratio = avg / total * 100.0; | |
266 | ||
267 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 268 | out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); |
f87027b9 JO |
269 | } |
270 | ||
140aeadc | 271 | static void print_dtlb_cache_misses(int cpu, |
b8f8eb84 | 272 | struct perf_evsel *evsel, |
140aeadc AK |
273 | double avg, |
274 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
275 | { |
276 | double total, ratio = 0.0; | |
277 | const char *color; | |
278 | int ctx = evsel_context(evsel); | |
279 | ||
280 | total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); | |
281 | ||
282 | if (total) | |
283 | ratio = avg / total * 100.0; | |
284 | ||
285 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 286 | out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); |
f87027b9 JO |
287 | } |
288 | ||
140aeadc | 289 | static void print_itlb_cache_misses(int cpu, |
b8f8eb84 | 290 | struct perf_evsel *evsel, |
140aeadc AK |
291 | double avg, |
292 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
293 | { |
294 | double total, ratio = 0.0; | |
295 | const char *color; | |
296 | int ctx = evsel_context(evsel); | |
297 | ||
298 | total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); | |
299 | ||
300 | if (total) | |
301 | ratio = avg / total * 100.0; | |
302 | ||
303 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 304 | out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); |
f87027b9 JO |
305 | } |
306 | ||
140aeadc | 307 | static void print_ll_cache_misses(int cpu, |
b8f8eb84 | 308 | struct perf_evsel *evsel, |
140aeadc AK |
309 | double avg, |
310 | struct perf_stat_output_ctx *out) | |
f87027b9 JO |
311 | { |
312 | double total, ratio = 0.0; | |
313 | const char *color; | |
314 | int ctx = evsel_context(evsel); | |
315 | ||
316 | total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); | |
317 | ||
318 | if (total) | |
319 | ratio = avg / total * 100.0; | |
320 | ||
321 | color = get_ratio_color(GRC_CACHE_MISSES, ratio); | |
140aeadc | 322 | out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); |
f87027b9 JO |
323 | } |
324 | ||
239bd47f AK |
325 | /* |
326 | * High level "TopDown" CPU core pipe line bottleneck break down. | |
327 | * | |
328 | * Basic concept following | |
329 | * Yasin, A Top Down Method for Performance analysis and Counter architecture | |
330 | * ISPASS14 | |
331 | * | |
332 | * The CPU pipeline is divided into 4 areas that can be bottlenecks: | |
333 | * | |
334 | * Frontend -> Backend -> Retiring | |
335 | * BadSpeculation in addition means out of order execution that is thrown away | |
336 | * (for example branch mispredictions) | |
337 | * Frontend is instruction decoding. | |
338 | * Backend is execution, like computation and accessing data in memory | |
339 | * Retiring is good execution that is not directly bottlenecked | |
340 | * | |
341 | * The formulas are computed in slots. | |
342 | * A slot is an entry in the pipeline each for the pipeline width | |
343 | * (for example a 4-wide pipeline has 4 slots for each cycle) | |
344 | * | |
345 | * Formulas: | |
346 | * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / | |
347 | * TotalSlots | |
348 | * Retiring = SlotsRetired / TotalSlots | |
349 | * FrontendBound = FetchBubbles / TotalSlots | |
350 | * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound | |
351 | * | |
352 | * The kernel provides the mapping to the low level CPU events and any scaling | |
353 | * needed for the CPU pipeline width, for example: | |
354 | * | |
355 | * TotalSlots = Cycles * 4 | |
356 | * | |
357 | * The scaling factor is communicated in the sysfs unit. | |
358 | * | |
359 | * In some cases the CPU may not be able to measure all the formulas due to | |
360 | * missing events. In this case multiple formulas are combined, as possible. | |
361 | * | |
362 | * Full TopDown supports more levels to sub-divide each area: for example | |
363 | * BackendBound into computing bound and memory bound. For now we only | |
364 | * support Level 1 TopDown. | |
365 | */ | |
366 | ||
367 | static double sanitize_val(double x) | |
368 | { | |
369 | if (x < 0 && x >= -0.02) | |
370 | return 0.0; | |
371 | return x; | |
372 | } | |
373 | ||
374 | static double td_total_slots(int ctx, int cpu) | |
375 | { | |
376 | return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); | |
377 | } | |
378 | ||
379 | static double td_bad_spec(int ctx, int cpu) | |
380 | { | |
381 | double bad_spec = 0; | |
382 | double total_slots; | |
383 | double total; | |
384 | ||
385 | total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - | |
386 | avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + | |
387 | avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); | |
388 | total_slots = td_total_slots(ctx, cpu); | |
389 | if (total_slots) | |
390 | bad_spec = total / total_slots; | |
391 | return sanitize_val(bad_spec); | |
392 | } | |
393 | ||
394 | static double td_retiring(int ctx, int cpu) | |
395 | { | |
396 | double retiring = 0; | |
397 | double total_slots = td_total_slots(ctx, cpu); | |
398 | double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); | |
399 | ||
400 | if (total_slots) | |
401 | retiring = ret_slots / total_slots; | |
402 | return retiring; | |
403 | } | |
404 | ||
405 | static double td_fe_bound(int ctx, int cpu) | |
406 | { | |
407 | double fe_bound = 0; | |
408 | double total_slots = td_total_slots(ctx, cpu); | |
409 | double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); | |
410 | ||
411 | if (total_slots) | |
412 | fe_bound = fetch_bub / total_slots; | |
413 | return fe_bound; | |
414 | } | |
415 | ||
416 | static double td_be_bound(int ctx, int cpu) | |
417 | { | |
418 | double sum = (td_fe_bound(ctx, cpu) + | |
419 | td_bad_spec(ctx, cpu) + | |
420 | td_retiring(ctx, cpu)); | |
421 | if (sum == 0) | |
422 | return 0; | |
423 | return sanitize_val(1.0 - sum); | |
424 | } | |
425 | ||
140aeadc AK |
426 | void perf_stat__print_shadow_stats(struct perf_evsel *evsel, |
427 | double avg, int cpu, | |
428 | struct perf_stat_output_ctx *out) | |
f87027b9 | 429 | { |
140aeadc AK |
430 | void *ctxp = out->ctx; |
431 | print_metric_t print_metric = out->print_metric; | |
f87027b9 | 432 | double total, ratio = 0.0, total2; |
239bd47f | 433 | const char *color = NULL; |
f87027b9 JO |
434 | int ctx = evsel_context(evsel); |
435 | ||
436 | if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { | |
437 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
438 | if (total) { | |
439 | ratio = avg / total; | |
140aeadc AK |
440 | print_metric(ctxp, NULL, "%7.2f ", |
441 | "insn per cycle", ratio); | |
f87027b9 | 442 | } else { |
140aeadc | 443 | print_metric(ctxp, NULL, NULL, "insn per cycle", 0); |
f87027b9 JO |
444 | } |
445 | total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); | |
446 | total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); | |
447 | ||
448 | if (total && avg) { | |
92a61f64 | 449 | out->new_line(ctxp); |
f87027b9 | 450 | ratio = total / avg; |
140aeadc AK |
451 | print_metric(ctxp, NULL, "%7.2f ", |
452 | "stalled cycles per insn", | |
453 | ratio); | |
fb4605ba | 454 | } else if (have_frontend_stalled) { |
140aeadc AK |
455 | print_metric(ctxp, NULL, NULL, |
456 | "stalled cycles per insn", 0); | |
f87027b9 | 457 | } |
140aeadc AK |
458 | } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { |
459 | if (runtime_branches_stats[ctx][cpu].n != 0) | |
460 | print_branch_misses(cpu, evsel, avg, out); | |
461 | else | |
462 | print_metric(ctxp, NULL, NULL, "of all branches", 0); | |
f87027b9 JO |
463 | } else if ( |
464 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
465 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | | |
466 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
467 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
468 | if (runtime_l1_dcache_stats[ctx][cpu].n != 0) | |
469 | print_l1_dcache_misses(cpu, evsel, avg, out); | |
470 | else | |
471 | print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); | |
f87027b9 JO |
472 | } else if ( |
473 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
474 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | | |
475 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
476 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
477 | if (runtime_l1_icache_stats[ctx][cpu].n != 0) | |
478 | print_l1_icache_misses(cpu, evsel, avg, out); | |
479 | else | |
480 | print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); | |
f87027b9 JO |
481 | } else if ( |
482 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
483 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | | |
484 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
485 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
486 | if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) | |
487 | print_dtlb_cache_misses(cpu, evsel, avg, out); | |
488 | else | |
489 | print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); | |
f87027b9 JO |
490 | } else if ( |
491 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
492 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | | |
493 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
494 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
495 | if (runtime_itlb_cache_stats[ctx][cpu].n != 0) | |
496 | print_itlb_cache_misses(cpu, evsel, avg, out); | |
497 | else | |
498 | print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); | |
f87027b9 JO |
499 | } else if ( |
500 | evsel->attr.type == PERF_TYPE_HW_CACHE && | |
501 | evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | | |
502 | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | | |
140aeadc AK |
503 | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { |
504 | if (runtime_ll_cache_stats[ctx][cpu].n != 0) | |
505 | print_ll_cache_misses(cpu, evsel, avg, out); | |
506 | else | |
507 | print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); | |
508 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { | |
f87027b9 JO |
509 | total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); |
510 | ||
511 | if (total) | |
512 | ratio = avg * 100 / total; | |
513 | ||
140aeadc AK |
514 | if (runtime_cacherefs_stats[ctx][cpu].n != 0) |
515 | print_metric(ctxp, NULL, "%8.3f %%", | |
516 | "of all cache refs", ratio); | |
517 | else | |
518 | print_metric(ctxp, NULL, NULL, "of all cache refs", 0); | |
f87027b9 | 519 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { |
140aeadc | 520 | print_stalled_cycles_frontend(cpu, evsel, avg, out); |
f87027b9 | 521 | } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { |
140aeadc | 522 | print_stalled_cycles_backend(cpu, evsel, avg, out); |
f87027b9 JO |
523 | } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { |
524 | total = avg_stats(&runtime_nsecs_stats[cpu]); | |
525 | ||
526 | if (total) { | |
527 | ratio = avg / total; | |
140aeadc | 528 | print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); |
f87027b9 | 529 | } else { |
140aeadc | 530 | print_metric(ctxp, NULL, NULL, "Ghz", 0); |
f87027b9 JO |
531 | } |
532 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { | |
533 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
534 | if (total) | |
140aeadc AK |
535 | print_metric(ctxp, NULL, |
536 | "%7.2f%%", "transactional cycles", | |
537 | 100.0 * (avg / total)); | |
538 | else | |
539 | print_metric(ctxp, NULL, NULL, "transactional cycles", | |
540 | 0); | |
f87027b9 JO |
541 | } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { |
542 | total = avg_stats(&runtime_cycles_stats[ctx][cpu]); | |
543 | total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | |
544 | if (total2 < avg) | |
545 | total2 = avg; | |
546 | if (total) | |
140aeadc | 547 | print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", |
f87027b9 | 548 | 100.0 * ((total2-avg) / total)); |
140aeadc AK |
549 | else |
550 | print_metric(ctxp, NULL, NULL, "aborted cycles", 0); | |
551 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { | |
f87027b9 JO |
552 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
553 | ||
54976285 | 554 | if (avg) |
f87027b9 JO |
555 | ratio = total / avg; |
556 | ||
140aeadc AK |
557 | if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) |
558 | print_metric(ctxp, NULL, "%8.0f", | |
559 | "cycles / transaction", ratio); | |
560 | else | |
561 | print_metric(ctxp, NULL, NULL, "cycles / transaction", | |
562 | 0); | |
563 | } else if (perf_stat_evsel__is(evsel, ELISION_START)) { | |
f87027b9 JO |
564 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
565 | ||
54976285 | 566 | if (avg) |
f87027b9 JO |
567 | ratio = total / avg; |
568 | ||
140aeadc | 569 | print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); |
daf4f478 NK |
570 | } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) || |
571 | perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) { | |
4579ecc8 | 572 | if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) |
140aeadc AK |
573 | print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", |
574 | avg / ratio); | |
4579ecc8 | 575 | else |
140aeadc | 576 | print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); |
239bd47f AK |
577 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { |
578 | double fe_bound = td_fe_bound(ctx, cpu); | |
579 | ||
580 | if (fe_bound > 0.2) | |
581 | color = PERF_COLOR_RED; | |
582 | print_metric(ctxp, color, "%8.1f%%", "frontend bound", | |
583 | fe_bound * 100.); | |
584 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { | |
585 | double retiring = td_retiring(ctx, cpu); | |
586 | ||
587 | if (retiring > 0.7) | |
588 | color = PERF_COLOR_GREEN; | |
589 | print_metric(ctxp, color, "%8.1f%%", "retiring", | |
590 | retiring * 100.); | |
591 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { | |
592 | double bad_spec = td_bad_spec(ctx, cpu); | |
593 | ||
594 | if (bad_spec > 0.1) | |
595 | color = PERF_COLOR_RED; | |
596 | print_metric(ctxp, color, "%8.1f%%", "bad speculation", | |
597 | bad_spec * 100.); | |
598 | } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { | |
599 | double be_bound = td_be_bound(ctx, cpu); | |
600 | const char *name = "backend bound"; | |
601 | static int have_recovery_bubbles = -1; | |
602 | ||
603 | /* In case the CPU does not support topdown-recovery-bubbles */ | |
604 | if (have_recovery_bubbles < 0) | |
605 | have_recovery_bubbles = pmu_have_event("cpu", | |
606 | "topdown-recovery-bubbles"); | |
607 | if (!have_recovery_bubbles) | |
608 | name = "backend bound/bad spec"; | |
609 | ||
610 | if (be_bound > 0.2) | |
611 | color = PERF_COLOR_RED; | |
612 | if (td_total_slots(ctx, cpu) > 0) | |
613 | print_metric(ctxp, color, "%8.1f%%", name, | |
614 | be_bound * 100.); | |
615 | else | |
616 | print_metric(ctxp, NULL, NULL, name, 0); | |
f87027b9 JO |
617 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
618 | char unit = 'M'; | |
140aeadc | 619 | char unit_buf[10]; |
f87027b9 JO |
620 | |
621 | total = avg_stats(&runtime_nsecs_stats[cpu]); | |
622 | ||
623 | if (total) | |
624 | ratio = 1000.0 * avg / total; | |
625 | if (ratio < 0.001) { | |
626 | ratio *= 1000; | |
627 | unit = 'K'; | |
628 | } | |
140aeadc AK |
629 | snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); |
630 | print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); | |
f87027b9 | 631 | } else { |
140aeadc | 632 | print_metric(ctxp, NULL, NULL, NULL, 0); |
f87027b9 JO |
633 | } |
634 | } |