tools/power/turbostat: Add skeleton support for table driven feature enumeration
[linux-block.git] / tools / power / x86 / turbostat / turbostat.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * turbostat -- show CPU frequency and C-state residency
4  * on modern Intel and AMD processors.
5  *
6  * Copyright (c) 2023 Intel Corporation.
7  * Len Brown <len.brown@intel.com>
8  */
9
10 #define _GNU_SOURCE
11 #include MSRHEADER
12 #include INTEL_FAMILY_HEADER
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <err.h>
16 #include <unistd.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <sys/stat.h>
20 #include <sys/select.h>
21 #include <sys/resource.h>
22 #include <fcntl.h>
23 #include <signal.h>
24 #include <sys/time.h>
25 #include <stdlib.h>
26 #include <getopt.h>
27 #include <dirent.h>
28 #include <string.h>
29 #include <ctype.h>
30 #include <sched.h>
31 #include <time.h>
32 #include <cpuid.h>
33 #include <sys/capability.h>
34 #include <errno.h>
35 #include <math.h>
36 #include <linux/perf_event.h>
37 #include <asm/unistd.h>
38 #include <stdbool.h>
39
40 #define UNUSED(x) (void)(x)
41
42 /*
43  * This list matches the column headers, except
44  * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
45  * 2. Core and CPU are moved to the end, we can't have strings that contain them
46  *    matching on them for --show and --hide.
47  */
48
49 /*
50  * buffer size used by sscanf() for added column names
51  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
52  */
53 #define NAME_BYTES 20
54 #define PATH_BYTES 128
55
56 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
57 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
58 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
59
60 struct msr_counter {
61         unsigned int msr_num;
62         char name[NAME_BYTES];
63         char path[PATH_BYTES];
64         unsigned int width;
65         enum counter_type type;
66         enum counter_format format;
67         struct msr_counter *next;
68         unsigned int flags;
69 #define FLAGS_HIDE      (1 << 0)
70 #define FLAGS_SHOW      (1 << 1)
71 #define SYSFS_PERCPU    (1 << 1)
72 };
73
74 struct msr_counter bic[] = {
75         { 0x0, "usec", "", 0, 0, 0, NULL, 0 },
76         { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 },
77         { 0x0, "Package", "", 0, 0, 0, NULL, 0 },
78         { 0x0, "Node", "", 0, 0, 0, NULL, 0 },
79         { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 },
80         { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 },
81         { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 },
82         { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 },
83         { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 },
84         { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 },
85         { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 },
86         { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 },
87         { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 },
88         { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 },
89         { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 },
90         { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 },
91         { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 },
92         { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 },
93         { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 },
94         { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 },
95         { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 },
96         { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 },
97         { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 },
98         { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 },
99         { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 },
100         { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 },
101         { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 },
102         { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 },
103         { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 },
104         { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 },
105         { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 },
106         { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 },
107         { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 },
108         { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 },
109         { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 },
110         { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 },
111         { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 },
112         { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 },
113         { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 },
114         { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 },
115         { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 },
116         { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 },
117         { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 },
118         { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 },
119         { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 },
120         { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 },
121         { 0x0, "Core", "", 0, 0, 0, NULL, 0 },
122         { 0x0, "CPU", "", 0, 0, 0, NULL, 0 },
123         { 0x0, "APIC", "", 0, 0, 0, NULL, 0 },
124         { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 },
125         { 0x0, "Die", "", 0, 0, 0, NULL, 0 },
126         { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 },
127         { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
128         { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
129         { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
130 };
131
132 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
133 #define BIC_USEC        (1ULL << 0)
134 #define BIC_TOD         (1ULL << 1)
135 #define BIC_Package     (1ULL << 2)
136 #define BIC_Node        (1ULL << 3)
137 #define BIC_Avg_MHz     (1ULL << 4)
138 #define BIC_Busy        (1ULL << 5)
139 #define BIC_Bzy_MHz     (1ULL << 6)
140 #define BIC_TSC_MHz     (1ULL << 7)
141 #define BIC_IRQ         (1ULL << 8)
142 #define BIC_SMI         (1ULL << 9)
143 #define BIC_sysfs       (1ULL << 10)
144 #define BIC_CPU_c1      (1ULL << 11)
145 #define BIC_CPU_c3      (1ULL << 12)
146 #define BIC_CPU_c6      (1ULL << 13)
147 #define BIC_CPU_c7      (1ULL << 14)
148 #define BIC_ThreadC     (1ULL << 15)
149 #define BIC_CoreTmp     (1ULL << 16)
150 #define BIC_CoreCnt     (1ULL << 17)
151 #define BIC_PkgTmp      (1ULL << 18)
152 #define BIC_GFX_rc6     (1ULL << 19)
153 #define BIC_GFXMHz      (1ULL << 20)
154 #define BIC_Pkgpc2      (1ULL << 21)
155 #define BIC_Pkgpc3      (1ULL << 22)
156 #define BIC_Pkgpc6      (1ULL << 23)
157 #define BIC_Pkgpc7      (1ULL << 24)
158 #define BIC_Pkgpc8      (1ULL << 25)
159 #define BIC_Pkgpc9      (1ULL << 26)
160 #define BIC_Pkgpc10     (1ULL << 27)
161 #define BIC_CPU_LPI     (1ULL << 28)
162 #define BIC_SYS_LPI     (1ULL << 29)
163 #define BIC_PkgWatt     (1ULL << 30)
164 #define BIC_CorWatt     (1ULL << 31)
165 #define BIC_GFXWatt     (1ULL << 32)
166 #define BIC_PkgCnt      (1ULL << 33)
167 #define BIC_RAMWatt     (1ULL << 34)
168 #define BIC_PKG__       (1ULL << 35)
169 #define BIC_RAM__       (1ULL << 36)
170 #define BIC_Pkg_J       (1ULL << 37)
171 #define BIC_Cor_J       (1ULL << 38)
172 #define BIC_GFX_J       (1ULL << 39)
173 #define BIC_RAM_J       (1ULL << 40)
174 #define BIC_Mod_c6      (1ULL << 41)
175 #define BIC_Totl_c0     (1ULL << 42)
176 #define BIC_Any_c0      (1ULL << 43)
177 #define BIC_GFX_c0      (1ULL << 44)
178 #define BIC_CPUGFX      (1ULL << 45)
179 #define BIC_Core        (1ULL << 46)
180 #define BIC_CPU         (1ULL << 47)
181 #define BIC_APIC        (1ULL << 48)
182 #define BIC_X2APIC      (1ULL << 49)
183 #define BIC_Die         (1ULL << 50)
184 #define BIC_GFXACTMHz   (1ULL << 51)
185 #define BIC_IPC         (1ULL << 52)
186 #define BIC_CORE_THROT_CNT      (1ULL << 53)
187 #define BIC_UNCORE_MHZ          (1ULL << 54)
188
189 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
190 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
191 #define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
192 #define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
193 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
194
195 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
196
197 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
198 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
199
200 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
201 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
202 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
203 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
204 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
205 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
206
207 char *proc_stat = "/proc/stat";
208 FILE *outf;
209 int *fd_percpu;
210 int *fd_instr_count_percpu;
211 struct timeval interval_tv = { 5, 0 };
212 struct timespec interval_ts = { 5, 0 };
213
214 unsigned int num_iterations;
215 unsigned int header_iterations;
216 unsigned int debug;
217 unsigned int quiet;
218 unsigned int shown;
219 unsigned int sums_need_wide_columns;
220 unsigned int rapl_joules;
221 unsigned int summary_only;
222 unsigned int list_header_only;
223 unsigned int dump_only;
224 unsigned int do_snb_cstates;
225 unsigned int do_knl_cstates;
226 unsigned int do_slm_cstates;
227 unsigned int use_c1_residency_msr;
228 unsigned int has_aperf;
229 unsigned int has_epb;
230 unsigned int has_turbo;
231 unsigned int is_hybrid;
232 unsigned int do_irtl_snb;
233 unsigned int do_irtl_hsw;
234 unsigned int units = 1000000;   /* MHz etc */
235 unsigned int genuine_intel;
236 unsigned int authentic_amd;
237 unsigned int hygon_genuine;
238 unsigned int max_level, max_extended_level;
239 unsigned int has_invariant_tsc;
240 unsigned int do_nhm_platform_info;
241 unsigned int no_MSR_MISC_PWR_MGMT;
242 unsigned int aperf_mperf_multiplier = 1;
243 double bclk;
244 double base_hz;
245 unsigned int has_base_hz;
246 double tsc_tweak = 1.0;
247 unsigned int show_pkg_only;
248 unsigned int show_core_only;
249 char *output_buffer, *outp;
250 unsigned int do_rapl;
251 unsigned int do_dts;
252 unsigned int do_ptm;
253 unsigned int do_ipc;
254 unsigned long long gfx_cur_rc6_ms;
255 unsigned long long cpuidle_cur_cpu_lpi_us;
256 unsigned long long cpuidle_cur_sys_lpi_us;
257 unsigned int gfx_cur_mhz;
258 unsigned int gfx_act_mhz;
259 unsigned int tj_max;
260 unsigned int tj_max_override;
261 int tcc_offset_bits;
262 double rapl_power_units, rapl_time_units;
263 double rapl_dram_energy_units, rapl_energy_units;
264 double rapl_joule_counter_range;
265 unsigned int do_core_perf_limit_reasons;
266 unsigned int has_automatic_cstate_conversion;
267 unsigned int dis_cstate_prewake;
268 unsigned int do_gfx_perf_limit_reasons;
269 unsigned int do_ring_perf_limit_reasons;
270 unsigned int crystal_hz;
271 unsigned long long tsc_hz;
272 int base_cpu;
273 double discover_bclk(unsigned int family, unsigned int model);
274 unsigned int has_hwp;           /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
275                         /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
276 unsigned int has_hwp_notify;    /* IA32_HWP_INTERRUPT */
277 unsigned int has_hwp_activity_window;   /* IA32_HWP_REQUEST[bits 41:32] */
278 unsigned int has_hwp_epp;       /* IA32_HWP_REQUEST[bits 31:24] */
279 unsigned int has_hwp_pkg;       /* IA32_HWP_REQUEST_PKG */
280 unsigned int has_misc_feature_control;
281 unsigned int first_counter_read = 1;
282 int ignore_stdin;
283
284 /* Model specific support Start */
285
286 /* List of features that may diverge among different platforms */
287 struct platform_features {
288 };
289
290 struct platform_data {
291         unsigned int model;
292         const struct platform_features *features;
293 };
294
295 static const struct platform_features nhm_features = {
296 };
297
298 static const struct platform_features nhx_features = {
299 };
300
301 static const struct platform_features snb_features = {
302 };
303
304 static const struct platform_features snx_features = {
305 };
306
307 static const struct platform_features ivb_features = {
308 };
309
310 static const struct platform_features ivx_features = {
311 };
312
313 static const struct platform_features hsw_features = {
314 };
315
316 static const struct platform_features hsx_features = {
317 };
318
319 static const struct platform_features hswl_features = {
320 };
321
322 static const struct platform_features hswg_features = {
323 };
324
325 static const struct platform_features bdw_features = {
326 };
327
328 static const struct platform_features bdwg_features = {
329 };
330
331 static const struct platform_features bdx_features = {
332 };
333
334 static const struct platform_features skl_features = {
335 };
336
337 static const struct platform_features cnl_features = {
338 };
339
340 static const struct platform_features skx_features = {
341 };
342
343 static const struct platform_features icx_features = {
344 };
345
346 static const struct platform_features spr_features = {
347 };
348
349 static const struct platform_features slv_features = {
350 };
351
352 static const struct platform_features slvd_features = {
353 };
354
355 static const struct platform_features amt_features = {
356 };
357
358 static const struct platform_features gmt_features = {
359 };
360
361 static const struct platform_features gmtd_features = {
362 };
363
364 static const struct platform_features gmtp_features = {
365 };
366
367 static const struct platform_features tmt_features = {
368 };
369
370 static const struct platform_features tmtd_features = {
371 };
372
373 static const struct platform_features knl_features = {
374 };
375
376 static const struct platform_features default_features = {
377 };
378
379 static const struct platform_features amd_features = {
380 };
381
382 static const struct platform_data turbostat_pdata[] = {
383         { INTEL_FAM6_NEHALEM, &nhm_features },
384         { INTEL_FAM6_NEHALEM_G, &nhm_features },
385         { INTEL_FAM6_NEHALEM_EP, &nhm_features },
386         { INTEL_FAM6_NEHALEM_EX, &nhx_features },
387         { INTEL_FAM6_WESTMERE, &nhm_features },
388         { INTEL_FAM6_WESTMERE_EP, &nhm_features },
389         { INTEL_FAM6_WESTMERE_EX, &nhx_features },
390         { INTEL_FAM6_SANDYBRIDGE, &snb_features },
391         { INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
392         { INTEL_FAM6_IVYBRIDGE, &ivb_features },
393         { INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
394         { INTEL_FAM6_HASWELL, &hsw_features },
395         { INTEL_FAM6_HASWELL_X, &hsx_features },
396         { INTEL_FAM6_HASWELL_L, &hswl_features },
397         { INTEL_FAM6_HASWELL_G, &hswg_features },
398         { INTEL_FAM6_BROADWELL, &bdw_features },
399         { INTEL_FAM6_BROADWELL_G, &bdwg_features },
400         { INTEL_FAM6_BROADWELL_X, &bdx_features },
401         { INTEL_FAM6_BROADWELL_D, &bdx_features },
402         { INTEL_FAM6_SKYLAKE_L, &skl_features },
403         { INTEL_FAM6_SKYLAKE, &skl_features },
404         { INTEL_FAM6_SKYLAKE_X, &skx_features },
405         { INTEL_FAM6_KABYLAKE_L, &skl_features },
406         { INTEL_FAM6_KABYLAKE, &skl_features },
407         { INTEL_FAM6_COMETLAKE, &skl_features },
408         { INTEL_FAM6_COMETLAKE_L, &skl_features },
409         { INTEL_FAM6_CANNONLAKE_L, &cnl_features },
410         { INTEL_FAM6_ICELAKE_X, &icx_features },
411         { INTEL_FAM6_ICELAKE_D, &icx_features },
412         { INTEL_FAM6_ICELAKE_L, &cnl_features },
413         { INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
414         { INTEL_FAM6_ROCKETLAKE, &cnl_features },
415         { INTEL_FAM6_TIGERLAKE_L, &cnl_features },
416         { INTEL_FAM6_TIGERLAKE, &cnl_features },
417         { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
418         { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
419         { INTEL_FAM6_LAKEFIELD, &cnl_features },
420         { INTEL_FAM6_ALDERLAKE, &cnl_features },
421         { INTEL_FAM6_ALDERLAKE_L, &cnl_features },
422         { INTEL_FAM6_RAPTORLAKE, &cnl_features },
423         { INTEL_FAM6_RAPTORLAKE_P, &cnl_features },
424         { INTEL_FAM6_RAPTORLAKE_S, &cnl_features },
425         { INTEL_FAM6_METEORLAKE, &cnl_features },
426         { INTEL_FAM6_METEORLAKE_L, &cnl_features },
427         { INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
428         { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
429         { INTEL_FAM6_ATOM_AIRMONT, &amt_features },
430         { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
431         { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
432         { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
433         { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
434         { INTEL_FAM6_ATOM_TREMONT, &tmt_features },
435         { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
436         { INTEL_FAM6_ATOM_GRACEMONT, &cnl_features },
437         { INTEL_FAM6_XEON_PHI_KNL, &knl_features },
438         { INTEL_FAM6_XEON_PHI_KNM, &knl_features },
439         /*
440          * Missing support for
441          * INTEL_FAM6_ICELAKE
442          * INTEL_FAM6_ATOM_SILVERMONT_MID
443          * INTEL_FAM6_ATOM_AIRMONT_MID
444          * INTEL_FAM6_ATOM_AIRMONT_NP
445          */
446         { 0, NULL },
447 };
448
449 static const struct platform_features *platform;
450
451 void probe_platform_features(unsigned int family, unsigned int model)
452 {
453         int i;
454
455         if (authentic_amd || hygon_genuine) {
456                 platform = &amd_features;
457                 return;
458         }
459
460         platform = &default_features;
461
462         if (!genuine_intel || family != 6)
463                 return;
464
465         for (i = 0; turbostat_pdata[i].features; i++) {
466                 if (turbostat_pdata[i].model == model) {
467                         platform = turbostat_pdata[i].features;
468                         return;
469                 }
470         }
471 }
472
473 /* Model specific support End */
474
475 #define RAPL_PKG                (1 << 0)
476                                         /* 0x610 MSR_PKG_POWER_LIMIT */
477                                         /* 0x611 MSR_PKG_ENERGY_STATUS */
478 #define RAPL_PKG_PERF_STATUS    (1 << 1)
479                                         /* 0x613 MSR_PKG_PERF_STATUS */
480 #define RAPL_PKG_POWER_INFO     (1 << 2)
481                                         /* 0x614 MSR_PKG_POWER_INFO */
482
483 #define RAPL_DRAM               (1 << 3)
484                                         /* 0x618 MSR_DRAM_POWER_LIMIT */
485                                         /* 0x619 MSR_DRAM_ENERGY_STATUS */
486 #define RAPL_DRAM_PERF_STATUS   (1 << 4)
487                                         /* 0x61b MSR_DRAM_PERF_STATUS */
488 #define RAPL_DRAM_POWER_INFO    (1 << 5)
489                                         /* 0x61c MSR_DRAM_POWER_INFO */
490
491 #define RAPL_CORES_POWER_LIMIT  (1 << 6)
492                                         /* 0x638 MSR_PP0_POWER_LIMIT */
493 #define RAPL_CORE_POLICY        (1 << 7)
494                                         /* 0x63a MSR_PP0_POLICY */
495
496 #define RAPL_GFX                (1 << 8)
497                                         /* 0x640 MSR_PP1_POWER_LIMIT */
498                                         /* 0x641 MSR_PP1_ENERGY_STATUS */
499                                         /* 0x642 MSR_PP1_POLICY */
500
501 #define RAPL_CORES_ENERGY_STATUS        (1 << 9)
502                                         /* 0x639 MSR_PP0_ENERGY_STATUS */
503 #define RAPL_PER_CORE_ENERGY    (1 << 10)
504                                         /* Indicates cores energy collection is per-core,
505                                          * not per-package. */
506 #define RAPL_AMD_F17H           (1 << 11)
507                                         /* 0xc0010299 MSR_RAPL_PWR_UNIT */
508                                         /* 0xc001029a MSR_CORE_ENERGY_STAT */
509                                         /* 0xc001029b MSR_PKG_ENERGY_STAT */
510 #define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
511 #define TJMAX_DEFAULT   100
512
513 /* MSRs that are not yet in the kernel-provided header. */
514 #define MSR_RAPL_PWR_UNIT       0xc0010299
515 #define MSR_CORE_ENERGY_STAT    0xc001029a
516 #define MSR_PKG_ENERGY_STAT     0xc001029b
517
518 #define MAX(a, b) ((a) > (b) ? (a) : (b))
519
520 int backwards_count;
521 char *progname;
522
523 #define CPU_SUBSET_MAXCPUS      1024    /* need to use before probe... */
524 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
525 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
526 #define MAX_ADDED_COUNTERS 8
527 #define MAX_ADDED_THREAD_COUNTERS 24
528 #define BITMASK_SIZE 32
529
530 struct thread_data {
531         struct timeval tv_begin;
532         struct timeval tv_end;
533         struct timeval tv_delta;
534         unsigned long long tsc;
535         unsigned long long aperf;
536         unsigned long long mperf;
537         unsigned long long c1;
538         unsigned long long instr_count;
539         unsigned long long irq_count;
540         unsigned int smi_count;
541         unsigned int cpu_id;
542         unsigned int apic_id;
543         unsigned int x2apic_id;
544         unsigned int flags;
545         bool is_atom;
546 #define CPU_IS_FIRST_THREAD_IN_CORE     0x2
547 #define CPU_IS_FIRST_CORE_IN_PACKAGE    0x4
548         unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
549 } *thread_even, *thread_odd;
550
551 struct core_data {
552         unsigned long long c3;
553         unsigned long long c6;
554         unsigned long long c7;
555         unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
556         unsigned int core_temp_c;
557         unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
558         unsigned int core_id;
559         unsigned long long core_throt_cnt;
560         unsigned long long counter[MAX_ADDED_COUNTERS];
561 } *core_even, *core_odd;
562
563 struct pkg_data {
564         unsigned long long pc2;
565         unsigned long long pc3;
566         unsigned long long pc6;
567         unsigned long long pc7;
568         unsigned long long pc8;
569         unsigned long long pc9;
570         unsigned long long pc10;
571         unsigned long long cpu_lpi;
572         unsigned long long sys_lpi;
573         unsigned long long pkg_wtd_core_c0;
574         unsigned long long pkg_any_core_c0;
575         unsigned long long pkg_any_gfxe_c0;
576         unsigned long long pkg_both_core_gfxe_c0;
577         long long gfx_rc6_ms;
578         unsigned int gfx_mhz;
579         unsigned int gfx_act_mhz;
580         unsigned int package_id;
581         unsigned long long energy_pkg;  /* MSR_PKG_ENERGY_STATUS */
582         unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
583         unsigned long long energy_cores;        /* MSR_PP0_ENERGY_STATUS */
584         unsigned long long energy_gfx;  /* MSR_PP1_ENERGY_STATUS */
585         unsigned long long rapl_pkg_perf_status;        /* MSR_PKG_PERF_STATUS */
586         unsigned long long rapl_dram_perf_status;       /* MSR_DRAM_PERF_STATUS */
587         unsigned int pkg_temp_c;
588         unsigned int uncore_mhz;
589         unsigned long long counter[MAX_ADDED_COUNTERS];
590 } *package_even, *package_odd;
591
592 #define ODD_COUNTERS thread_odd, core_odd, package_odd
593 #define EVEN_COUNTERS thread_even, core_even, package_even
594
595 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)          \
596         ((thread_base) +                                                      \
597          ((pkg_no) *                                                          \
598           topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
599          ((node_no) * topo.cores_per_node * topo.threads_per_core) +          \
600          ((core_no) * topo.threads_per_core) +                                \
601          (thread_no))
602
603 #define GET_CORE(core_base, core_no, node_no, pkg_no)                   \
604         ((core_base) +                                                  \
605          ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +       \
606          ((node_no) * topo.cores_per_node) +                            \
607          (core_no))
608
609 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
610
611 /*
612  * The accumulated sum of MSR is defined as a monotonic
613  * increasing MSR, it will be accumulated periodically,
614  * despite its register's bit width.
615  */
616 enum {
617         IDX_PKG_ENERGY,
618         IDX_DRAM_ENERGY,
619         IDX_PP0_ENERGY,
620         IDX_PP1_ENERGY,
621         IDX_PKG_PERF,
622         IDX_DRAM_PERF,
623         IDX_COUNT,
624 };
625
626 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
627
628 struct msr_sum_array {
629         /* get_msr_sum() = sum + (get_msr() - last) */
630         struct {
631                 /*The accumulated MSR value is updated by the timer */
632                 unsigned long long sum;
633                 /*The MSR footprint recorded in last timer */
634                 unsigned long long last;
635         } entries[IDX_COUNT];
636 };
637
638 /* The percpu MSR sum array.*/
639 struct msr_sum_array *per_cpu_msr_sum;
640
641 off_t idx_to_offset(int idx)
642 {
643         off_t offset;
644
645         switch (idx) {
646         case IDX_PKG_ENERGY:
647                 if (do_rapl & RAPL_AMD_F17H)
648                         offset = MSR_PKG_ENERGY_STAT;
649                 else
650                         offset = MSR_PKG_ENERGY_STATUS;
651                 break;
652         case IDX_DRAM_ENERGY:
653                 offset = MSR_DRAM_ENERGY_STATUS;
654                 break;
655         case IDX_PP0_ENERGY:
656                 offset = MSR_PP0_ENERGY_STATUS;
657                 break;
658         case IDX_PP1_ENERGY:
659                 offset = MSR_PP1_ENERGY_STATUS;
660                 break;
661         case IDX_PKG_PERF:
662                 offset = MSR_PKG_PERF_STATUS;
663                 break;
664         case IDX_DRAM_PERF:
665                 offset = MSR_DRAM_PERF_STATUS;
666                 break;
667         default:
668                 offset = -1;
669         }
670         return offset;
671 }
672
673 int offset_to_idx(off_t offset)
674 {
675         int idx;
676
677         switch (offset) {
678         case MSR_PKG_ENERGY_STATUS:
679         case MSR_PKG_ENERGY_STAT:
680                 idx = IDX_PKG_ENERGY;
681                 break;
682         case MSR_DRAM_ENERGY_STATUS:
683                 idx = IDX_DRAM_ENERGY;
684                 break;
685         case MSR_PP0_ENERGY_STATUS:
686                 idx = IDX_PP0_ENERGY;
687                 break;
688         case MSR_PP1_ENERGY_STATUS:
689                 idx = IDX_PP1_ENERGY;
690                 break;
691         case MSR_PKG_PERF_STATUS:
692                 idx = IDX_PKG_PERF;
693                 break;
694         case MSR_DRAM_PERF_STATUS:
695                 idx = IDX_DRAM_PERF;
696                 break;
697         default:
698                 idx = -1;
699         }
700         return idx;
701 }
702
703 int idx_valid(int idx)
704 {
705         switch (idx) {
706         case IDX_PKG_ENERGY:
707                 return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
708         case IDX_DRAM_ENERGY:
709                 return do_rapl & RAPL_DRAM;
710         case IDX_PP0_ENERGY:
711                 return do_rapl & RAPL_CORES_ENERGY_STATUS;
712         case IDX_PP1_ENERGY:
713                 return do_rapl & RAPL_GFX;
714         case IDX_PKG_PERF:
715                 return do_rapl & RAPL_PKG_PERF_STATUS;
716         case IDX_DRAM_PERF:
717                 return do_rapl & RAPL_DRAM_PERF_STATUS;
718         default:
719                 return 0;
720         }
721 }
722
723 struct sys_counters {
724         unsigned int added_thread_counters;
725         unsigned int added_core_counters;
726         unsigned int added_package_counters;
727         struct msr_counter *tp;
728         struct msr_counter *cp;
729         struct msr_counter *pp;
730 } sys;
731
732 struct system_summary {
733         struct thread_data threads;
734         struct core_data cores;
735         struct pkg_data packages;
736 } average;
737
738 struct cpu_topology {
739         int physical_package_id;
740         int die_id;
741         int logical_cpu_id;
742         int physical_node_id;
743         int logical_node_id;    /* 0-based count within the package */
744         int physical_core_id;
745         int thread_id;
746         cpu_set_t *put_ids;     /* Processing Unit/Thread IDs */
747 } *cpus;
748
749 struct topo_params {
750         int num_packages;
751         int num_die;
752         int num_cpus;
753         int num_cores;
754         int max_cpu_num;
755         int max_node_num;
756         int nodes_per_pkg;
757         int cores_per_node;
758         int threads_per_core;
759 } topo;
760
761 struct timeval tv_even, tv_odd, tv_delta;
762
763 int *irq_column_2_cpu;          /* /proc/interrupts column numbers */
764 int *irqs_per_cpu;              /* indexed by cpu_num */
765
766 void setup_all_buffers(void);
767
768 char *sys_lpi_file;
769 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
770 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
771
772 int cpu_is_not_present(int cpu)
773 {
774         return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
775 }
776
777 /*
778  * run func(thread, core, package) in topology order
779  * skip non-present cpus
780  */
781
782 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
783                  struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
784 {
785         int retval, pkg_no, core_no, thread_no, node_no;
786
787         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
788                 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
789                         for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
790                                 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
791                                         struct thread_data *t;
792                                         struct core_data *c;
793                                         struct pkg_data *p;
794
795                                         t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
796
797                                         if (cpu_is_not_present(t->cpu_id))
798                                                 continue;
799
800                                         c = GET_CORE(core_base, core_no, node_no, pkg_no);
801                                         p = GET_PKG(pkg_base, pkg_no);
802
803                                         retval = func(t, c, p);
804                                         if (retval)
805                                                 return retval;
806                                 }
807                         }
808                 }
809         }
810         return 0;
811 }
812
813 int cpu_migrate(int cpu)
814 {
815         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
816         CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
817         if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
818                 return -1;
819         else
820                 return 0;
821 }
822
823 int get_msr_fd(int cpu)
824 {
825         char pathname[32];
826         int fd;
827
828         fd = fd_percpu[cpu];
829
830         if (fd)
831                 return fd;
832
833         sprintf(pathname, "/dev/cpu/%d/msr", cpu);
834         fd = open(pathname, O_RDONLY);
835         if (fd < 0)
836                 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
837
838         fd_percpu[cpu] = fd;
839
840         return fd;
841 }
842
843 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
844 {
845         return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
846 }
847
848 static int perf_instr_count_open(int cpu_num)
849 {
850         struct perf_event_attr pea;
851         int fd;
852
853         memset(&pea, 0, sizeof(struct perf_event_attr));
854         pea.type = PERF_TYPE_HARDWARE;
855         pea.size = sizeof(struct perf_event_attr);
856         pea.config = PERF_COUNT_HW_INSTRUCTIONS;
857
858         /* counter for cpu_num, including user + kernel and all processes */
859         fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
860         if (fd == -1) {
861                 warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
862                 BIC_NOT_PRESENT(BIC_IPC);
863         }
864
865         return fd;
866 }
867
868 int get_instr_count_fd(int cpu)
869 {
870         if (fd_instr_count_percpu[cpu])
871                 return fd_instr_count_percpu[cpu];
872
873         fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
874
875         return fd_instr_count_percpu[cpu];
876 }
877
878 int get_msr(int cpu, off_t offset, unsigned long long *msr)
879 {
880         ssize_t retval;
881
882         retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
883
884         if (retval != sizeof *msr)
885                 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
886
887         return 0;
888 }
889
890 #define MAX_DEFERRED 16
891 char *deferred_add_names[MAX_DEFERRED];
892 char *deferred_skip_names[MAX_DEFERRED];
893 int deferred_add_index;
894 int deferred_skip_index;
895
896 /*
897  * HIDE_LIST - hide this list of counters, show the rest [default]
898  * SHOW_LIST - show this list of counters, hide the rest
899  */
900 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
901
902 void help(void)
903 {
904         fprintf(outf,
905                 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
906                 "\n"
907                 "Turbostat forks the specified COMMAND and prints statistics\n"
908                 "when COMMAND completes.\n"
909                 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
910                 "to print statistics, until interrupted.\n"
911                 "  -a, --add    add a counter\n"
912                 "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
913                 "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
914                 "                 {core | package | j,k,l..m,n-p }\n"
915                 "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
916                 "  -D, --Dump   displays the raw counter values\n"
917                 "  -e, --enable [all | column]\n"
918                 "               shows all or the specified disabled column\n"
919                 "  -H, --hide [column|column,column,...]\n"
920                 "               hide the specified column(s)\n"
921                 "  -i, --interval sec.subsec\n"
922                 "               Override default 5-second measurement interval\n"
923                 "  -J, --Joules displays energy in Joules instead of Watts\n"
924                 "  -l, --list   list column headers only\n"
925                 "  -n, --num_iterations num\n"
926                 "               number of the measurement iterations\n"
927                 "  -N, --header_iterations num\n"
928                 "               print header every num iterations\n"
929                 "  -o, --out file\n"
930                 "               create or truncate \"file\" for all output\n"
931                 "  -q, --quiet  skip decoding system configuration header\n"
932                 "  -s, --show [column|column,column,...]\n"
933                 "               show only the specified column(s)\n"
934                 "  -S, --Summary\n"
935                 "               limits output to 1-line system summary per interval\n"
936                 "  -T, --TCC temperature\n"
937                 "               sets the Thermal Control Circuit temperature in\n"
938                 "                 degrees Celsius\n"
939                 "  -h, --help   print this help message\n"
940                 "  -v, --version        print version information\n" "\n" "For more help, run \"man turbostat\"\n");
941 }
942
943 /*
944  * bic_lookup
945  * for all the strings in comma separate name_list,
946  * set the approprate bit in return value.
947  */
948 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
949 {
950         unsigned int i;
951         unsigned long long retval = 0;
952
953         while (name_list) {
954                 char *comma;
955
956                 comma = strchr(name_list, ',');
957
958                 if (comma)
959                         *comma = '\0';
960
961                 for (i = 0; i < MAX_BIC; ++i) {
962                         if (!strcmp(name_list, bic[i].name)) {
963                                 retval |= (1ULL << i);
964                                 break;
965                         }
966                         if (!strcmp(name_list, "all")) {
967                                 retval |= ~0;
968                                 break;
969                         } else if (!strcmp(name_list, "topology")) {
970                                 retval |= BIC_TOPOLOGY;
971                                 break;
972                         } else if (!strcmp(name_list, "power")) {
973                                 retval |= BIC_THERMAL_PWR;
974                                 break;
975                         } else if (!strcmp(name_list, "idle")) {
976                                 retval |= BIC_IDLE;
977                                 break;
978                         } else if (!strcmp(name_list, "frequency")) {
979                                 retval |= BIC_FREQUENCY;
980                                 break;
981                         } else if (!strcmp(name_list, "other")) {
982                                 retval |= BIC_OTHER;
983                                 break;
984                         }
985
986                 }
987                 if (i == MAX_BIC) {
988                         if (mode == SHOW_LIST) {
989                                 deferred_add_names[deferred_add_index++] = name_list;
990                                 if (deferred_add_index >= MAX_DEFERRED) {
991                                         fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
992                                                 MAX_DEFERRED, name_list);
993                                         help();
994                                         exit(1);
995                                 }
996                         } else {
997                                 deferred_skip_names[deferred_skip_index++] = name_list;
998                                 if (debug)
999                                         fprintf(stderr, "deferred \"%s\"\n", name_list);
1000                                 if (deferred_skip_index >= MAX_DEFERRED) {
1001                                         fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
1002                                                 MAX_DEFERRED, name_list);
1003                                         help();
1004                                         exit(1);
1005                                 }
1006                         }
1007                 }
1008
1009                 name_list = comma;
1010                 if (name_list)
1011                         name_list++;
1012
1013         }
1014         return retval;
1015 }
1016
1017 void print_header(char *delim)
1018 {
1019         struct msr_counter *mp;
1020         int printed = 0;
1021
1022         if (DO_BIC(BIC_USEC))
1023                 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
1024         if (DO_BIC(BIC_TOD))
1025                 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
1026         if (DO_BIC(BIC_Package))
1027                 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
1028         if (DO_BIC(BIC_Die))
1029                 outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
1030         if (DO_BIC(BIC_Node))
1031                 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
1032         if (DO_BIC(BIC_Core))
1033                 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
1034         if (DO_BIC(BIC_CPU))
1035                 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
1036         if (DO_BIC(BIC_APIC))
1037                 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
1038         if (DO_BIC(BIC_X2APIC))
1039                 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
1040         if (DO_BIC(BIC_Avg_MHz))
1041                 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
1042         if (DO_BIC(BIC_Busy))
1043                 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
1044         if (DO_BIC(BIC_Bzy_MHz))
1045                 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
1046         if (DO_BIC(BIC_TSC_MHz))
1047                 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
1048
1049         if (DO_BIC(BIC_IPC))
1050                 outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
1051
1052         if (DO_BIC(BIC_IRQ)) {
1053                 if (sums_need_wide_columns)
1054                         outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
1055                 else
1056                         outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
1057         }
1058
1059         if (DO_BIC(BIC_SMI))
1060                 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
1061
1062         for (mp = sys.tp; mp; mp = mp->next) {
1063
1064                 if (mp->format == FORMAT_RAW) {
1065                         if (mp->width == 64)
1066                                 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
1067                         else
1068                                 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
1069                 } else {
1070                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1071                                 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
1072                         else
1073                                 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
1074                 }
1075         }
1076
1077         if (DO_BIC(BIC_CPU_c1))
1078                 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
1079         if (DO_BIC(BIC_CPU_c3))
1080                 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
1081         if (DO_BIC(BIC_CPU_c6))
1082                 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
1083         if (DO_BIC(BIC_CPU_c7))
1084                 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
1085
1086         if (DO_BIC(BIC_Mod_c6))
1087                 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
1088
1089         if (DO_BIC(BIC_CoreTmp))
1090                 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
1091
1092         if (DO_BIC(BIC_CORE_THROT_CNT))
1093                 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
1094
1095         if (do_rapl && !rapl_joules) {
1096                 if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1097                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
1098         } else if (do_rapl && rapl_joules) {
1099                 if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1100                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
1101         }
1102
1103         for (mp = sys.cp; mp; mp = mp->next) {
1104                 if (mp->format == FORMAT_RAW) {
1105                         if (mp->width == 64)
1106                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
1107                         else
1108                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
1109                 } else {
1110                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1111                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
1112                         else
1113                                 outp += sprintf(outp, "%s%s", delim, mp->name);
1114                 }
1115         }
1116
1117         if (DO_BIC(BIC_PkgTmp))
1118                 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
1119
1120         if (DO_BIC(BIC_GFX_rc6))
1121                 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
1122
1123         if (DO_BIC(BIC_GFXMHz))
1124                 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
1125
1126         if (DO_BIC(BIC_GFXACTMHz))
1127                 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
1128
1129         if (DO_BIC(BIC_Totl_c0))
1130                 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
1131         if (DO_BIC(BIC_Any_c0))
1132                 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
1133         if (DO_BIC(BIC_GFX_c0))
1134                 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
1135         if (DO_BIC(BIC_CPUGFX))
1136                 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
1137
1138         if (DO_BIC(BIC_Pkgpc2))
1139                 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
1140         if (DO_BIC(BIC_Pkgpc3))
1141                 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
1142         if (DO_BIC(BIC_Pkgpc6))
1143                 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
1144         if (DO_BIC(BIC_Pkgpc7))
1145                 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
1146         if (DO_BIC(BIC_Pkgpc8))
1147                 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
1148         if (DO_BIC(BIC_Pkgpc9))
1149                 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
1150         if (DO_BIC(BIC_Pkgpc10))
1151                 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
1152         if (DO_BIC(BIC_CPU_LPI))
1153                 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
1154         if (DO_BIC(BIC_SYS_LPI))
1155                 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
1156
1157         if (do_rapl && !rapl_joules) {
1158                 if (DO_BIC(BIC_PkgWatt))
1159                         outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
1160                 if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1161                         outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
1162                 if (DO_BIC(BIC_GFXWatt))
1163                         outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
1164                 if (DO_BIC(BIC_RAMWatt))
1165                         outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
1166                 if (DO_BIC(BIC_PKG__))
1167                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
1168                 if (DO_BIC(BIC_RAM__))
1169                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
1170         } else if (do_rapl && rapl_joules) {
1171                 if (DO_BIC(BIC_Pkg_J))
1172                         outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
1173                 if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1174                         outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
1175                 if (DO_BIC(BIC_GFX_J))
1176                         outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
1177                 if (DO_BIC(BIC_RAM_J))
1178                         outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
1179                 if (DO_BIC(BIC_PKG__))
1180                         outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
1181                 if (DO_BIC(BIC_RAM__))
1182                         outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
1183         }
1184         if (DO_BIC(BIC_UNCORE_MHZ))
1185                 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
1186
1187         for (mp = sys.pp; mp; mp = mp->next) {
1188                 if (mp->format == FORMAT_RAW) {
1189                         if (mp->width == 64)
1190                                 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
1191                         else
1192                                 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
1193                 } else {
1194                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1195                                 outp += sprintf(outp, "%s%8s", delim, mp->name);
1196                         else
1197                                 outp += sprintf(outp, "%s%s", delim, mp->name);
1198                 }
1199         }
1200
1201         outp += sprintf(outp, "\n");
1202 }
1203
1204 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1205 {
1206         int i;
1207         struct msr_counter *mp;
1208
1209         outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
1210
1211         if (t) {
1212                 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
1213                 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
1214                 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
1215                 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
1216                 outp += sprintf(outp, "c1: %016llX\n", t->c1);
1217
1218                 if (DO_BIC(BIC_IPC))
1219                         outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
1220
1221                 if (DO_BIC(BIC_IRQ))
1222                         outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
1223                 if (DO_BIC(BIC_SMI))
1224                         outp += sprintf(outp, "SMI: %d\n", t->smi_count);
1225
1226                 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1227                         outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
1228                 }
1229         }
1230
1231         if (c) {
1232                 outp += sprintf(outp, "core: %d\n", c->core_id);
1233                 outp += sprintf(outp, "c3: %016llX\n", c->c3);
1234                 outp += sprintf(outp, "c6: %016llX\n", c->c6);
1235                 outp += sprintf(outp, "c7: %016llX\n", c->c7);
1236                 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
1237                 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
1238                 outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
1239
1240                 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1241                         outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
1242                 }
1243                 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
1244         }
1245
1246         if (p) {
1247                 outp += sprintf(outp, "package: %d\n", p->package_id);
1248
1249                 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
1250                 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
1251                 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
1252                 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
1253
1254                 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
1255                 if (DO_BIC(BIC_Pkgpc3))
1256                         outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
1257                 if (DO_BIC(BIC_Pkgpc6))
1258                         outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
1259                 if (DO_BIC(BIC_Pkgpc7))
1260                         outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
1261                 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
1262                 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
1263                 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
1264                 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
1265                 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
1266                 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
1267                 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
1268                 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
1269                 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
1270                 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
1271                 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
1272                 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
1273
1274                 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1275                         outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
1276                 }
1277         }
1278
1279         outp += sprintf(outp, "\n");
1280
1281         return 0;
1282 }
1283
1284 /*
1285  * column formatting convention & formats
1286  */
1287 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1288 {
1289         double interval_float, tsc;
1290         char *fmt8;
1291         int i;
1292         struct msr_counter *mp;
1293         char *delim = "\t";
1294         int printed = 0;
1295
1296         /* if showing only 1st thread in core and this isn't one, bail out */
1297         if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1298                 return 0;
1299
1300         /* if showing only 1st thread in pkg and this isn't one, bail out */
1301         if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1302                 return 0;
1303
1304         /*if not summary line and --cpu is used */
1305         if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
1306                 return 0;
1307
1308         if (DO_BIC(BIC_USEC)) {
1309                 /* on each row, print how many usec each timestamp took to gather */
1310                 struct timeval tv;
1311
1312                 timersub(&t->tv_end, &t->tv_begin, &tv);
1313                 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
1314         }
1315
1316         /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
1317         if (DO_BIC(BIC_TOD))
1318                 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
1319
1320         interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
1321
1322         tsc = t->tsc * tsc_tweak;
1323
1324         /* topo columns, print blanks on 1st (average) line */
1325         if (t == &average.threads) {
1326                 if (DO_BIC(BIC_Package))
1327                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1328                 if (DO_BIC(BIC_Die))
1329                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1330                 if (DO_BIC(BIC_Node))
1331                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1332                 if (DO_BIC(BIC_Core))
1333                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1334                 if (DO_BIC(BIC_CPU))
1335                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1336                 if (DO_BIC(BIC_APIC))
1337                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1338                 if (DO_BIC(BIC_X2APIC))
1339                         outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1340         } else {
1341                 if (DO_BIC(BIC_Package)) {
1342                         if (p)
1343                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
1344                         else
1345                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1346                 }
1347                 if (DO_BIC(BIC_Die)) {
1348                         if (c)
1349                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
1350                         else
1351                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1352                 }
1353                 if (DO_BIC(BIC_Node)) {
1354                         if (t)
1355                                 outp += sprintf(outp, "%s%d",
1356                                                 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
1357                         else
1358                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1359                 }
1360                 if (DO_BIC(BIC_Core)) {
1361                         if (c)
1362                                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
1363                         else
1364                                 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
1365                 }
1366                 if (DO_BIC(BIC_CPU))
1367                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
1368                 if (DO_BIC(BIC_APIC))
1369                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
1370                 if (DO_BIC(BIC_X2APIC))
1371                         outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
1372         }
1373
1374         if (DO_BIC(BIC_Avg_MHz))
1375                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
1376
1377         if (DO_BIC(BIC_Busy))
1378                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
1379
1380         if (DO_BIC(BIC_Bzy_MHz)) {
1381                 if (has_base_hz)
1382                         outp +=
1383                             sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
1384                 else
1385                         outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
1386                                         tsc / units * t->aperf / t->mperf / interval_float);
1387         }
1388
1389         if (DO_BIC(BIC_TSC_MHz))
1390                 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
1391
1392         if (DO_BIC(BIC_IPC))
1393                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
1394
1395         /* IRQ */
1396         if (DO_BIC(BIC_IRQ)) {
1397                 if (sums_need_wide_columns)
1398                         outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
1399                 else
1400                         outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
1401         }
1402
1403         /* SMI */
1404         if (DO_BIC(BIC_SMI))
1405                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
1406
1407         /* Added counters */
1408         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1409                 if (mp->format == FORMAT_RAW) {
1410                         if (mp->width == 32)
1411                                 outp +=
1412                                     sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
1413                         else
1414                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
1415                 } else if (mp->format == FORMAT_DELTA) {
1416                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1417                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
1418                         else
1419                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
1420                 } else if (mp->format == FORMAT_PERCENT) {
1421                         if (mp->type == COUNTER_USEC)
1422                                 outp +=
1423                                     sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1424                                             t->counter[i] / interval_float / 10000);
1425                         else
1426                                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
1427                 }
1428         }
1429
1430         /* C1 */
1431         if (DO_BIC(BIC_CPU_c1))
1432                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
1433
1434         /* print per-core data only for 1st thread in core */
1435         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1436                 goto done;
1437
1438         if (DO_BIC(BIC_CPU_c3))
1439                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
1440         if (DO_BIC(BIC_CPU_c6))
1441                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
1442         if (DO_BIC(BIC_CPU_c7))
1443                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
1444
1445         /* Mod%c6 */
1446         if (DO_BIC(BIC_Mod_c6))
1447                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
1448
1449         if (DO_BIC(BIC_CoreTmp))
1450                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
1451
1452         /* Core throttle count */
1453         if (DO_BIC(BIC_CORE_THROT_CNT))
1454                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
1455
1456         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1457                 if (mp->format == FORMAT_RAW) {
1458                         if (mp->width == 32)
1459                                 outp +=
1460                                     sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
1461                         else
1462                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
1463                 } else if (mp->format == FORMAT_DELTA) {
1464                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1465                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
1466                         else
1467                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
1468                 } else if (mp->format == FORMAT_PERCENT) {
1469                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
1470                 }
1471         }
1472
1473         fmt8 = "%s%.2f";
1474
1475         if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
1476                 outp +=
1477                     sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
1478         if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
1479                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
1480
1481         /* print per-package data only for 1st core in package */
1482         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1483                 goto done;
1484
1485         /* PkgTmp */
1486         if (DO_BIC(BIC_PkgTmp))
1487                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
1488
1489         /* GFXrc6 */
1490         if (DO_BIC(BIC_GFX_rc6)) {
1491                 if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
1492                         outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
1493                 } else {
1494                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
1495                                         p->gfx_rc6_ms / 10.0 / interval_float);
1496                 }
1497         }
1498
1499         /* GFXMHz */
1500         if (DO_BIC(BIC_GFXMHz))
1501                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
1502
1503         /* GFXACTMHz */
1504         if (DO_BIC(BIC_GFXACTMHz))
1505                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
1506
1507         /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
1508         if (DO_BIC(BIC_Totl_c0))
1509                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
1510         if (DO_BIC(BIC_Any_c0))
1511                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
1512         if (DO_BIC(BIC_GFX_c0))
1513                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
1514         if (DO_BIC(BIC_CPUGFX))
1515                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
1516
1517         if (DO_BIC(BIC_Pkgpc2))
1518                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
1519         if (DO_BIC(BIC_Pkgpc3))
1520                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
1521         if (DO_BIC(BIC_Pkgpc6))
1522                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
1523         if (DO_BIC(BIC_Pkgpc7))
1524                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
1525         if (DO_BIC(BIC_Pkgpc8))
1526                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
1527         if (DO_BIC(BIC_Pkgpc9))
1528                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
1529         if (DO_BIC(BIC_Pkgpc10))
1530                 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
1531
1532         if (DO_BIC(BIC_CPU_LPI))
1533                 outp +=
1534                     sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
1535         if (DO_BIC(BIC_SYS_LPI))
1536                 outp +=
1537                     sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
1538
1539         if (DO_BIC(BIC_PkgWatt))
1540                 outp +=
1541                     sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
1542
1543         if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1544                 outp +=
1545                     sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
1546         if (DO_BIC(BIC_GFXWatt))
1547                 outp +=
1548                     sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
1549         if (DO_BIC(BIC_RAMWatt))
1550                 outp +=
1551                     sprintf(outp, fmt8, (printed++ ? delim : ""),
1552                             p->energy_dram * rapl_dram_energy_units / interval_float);
1553         if (DO_BIC(BIC_Pkg_J))
1554                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
1555         if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
1556                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
1557         if (DO_BIC(BIC_GFX_J))
1558                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
1559         if (DO_BIC(BIC_RAM_J))
1560                 outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
1561         if (DO_BIC(BIC_PKG__))
1562                 outp +=
1563                     sprintf(outp, fmt8, (printed++ ? delim : ""),
1564                             100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
1565         if (DO_BIC(BIC_RAM__))
1566                 outp +=
1567                     sprintf(outp, fmt8, (printed++ ? delim : ""),
1568                             100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
1569         /* UncMHz */
1570         if (DO_BIC(BIC_UNCORE_MHZ))
1571                 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
1572
1573         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1574                 if (mp->format == FORMAT_RAW) {
1575                         if (mp->width == 32)
1576                                 outp +=
1577                                     sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
1578                         else
1579                                 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
1580                 } else if (mp->format == FORMAT_DELTA) {
1581                         if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
1582                                 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
1583                         else
1584                                 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
1585                 } else if (mp->format == FORMAT_PERCENT) {
1586                         outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
1587                 }
1588         }
1589
1590 done:
1591         if (*(outp - 1) != '\n')
1592                 outp += sprintf(outp, "\n");
1593
1594         return 0;
1595 }
1596
1597 void flush_output_stdout(void)
1598 {
1599         FILE *filep;
1600
1601         if (outf == stderr)
1602                 filep = stdout;
1603         else
1604                 filep = outf;
1605
1606         fputs(output_buffer, filep);
1607         fflush(filep);
1608
1609         outp = output_buffer;
1610 }
1611
1612 void flush_output_stderr(void)
1613 {
1614         fputs(output_buffer, outf);
1615         fflush(outf);
1616         outp = output_buffer;
1617 }
1618
1619 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1620 {
1621         static int count;
1622
1623         if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
1624                 print_header("\t");
1625
1626         format_counters(&average.threads, &average.cores, &average.packages);
1627
1628         count++;
1629
1630         if (summary_only)
1631                 return;
1632
1633         for_all_cpus(format_counters, t, c, p);
1634 }
1635
1636 #define DELTA_WRAP32(new, old)                  \
1637         old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
1638
1639 int delta_package(struct pkg_data *new, struct pkg_data *old)
1640 {
1641         int i;
1642         struct msr_counter *mp;
1643
1644         if (DO_BIC(BIC_Totl_c0))
1645                 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
1646         if (DO_BIC(BIC_Any_c0))
1647                 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
1648         if (DO_BIC(BIC_GFX_c0))
1649                 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
1650         if (DO_BIC(BIC_CPUGFX))
1651                 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
1652
1653         old->pc2 = new->pc2 - old->pc2;
1654         if (DO_BIC(BIC_Pkgpc3))
1655                 old->pc3 = new->pc3 - old->pc3;
1656         if (DO_BIC(BIC_Pkgpc6))
1657                 old->pc6 = new->pc6 - old->pc6;
1658         if (DO_BIC(BIC_Pkgpc7))
1659                 old->pc7 = new->pc7 - old->pc7;
1660         old->pc8 = new->pc8 - old->pc8;
1661         old->pc9 = new->pc9 - old->pc9;
1662         old->pc10 = new->pc10 - old->pc10;
1663         old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
1664         old->sys_lpi = new->sys_lpi - old->sys_lpi;
1665         old->pkg_temp_c = new->pkg_temp_c;
1666
1667         /* flag an error when rc6 counter resets/wraps */
1668         if (old->gfx_rc6_ms > new->gfx_rc6_ms)
1669                 old->gfx_rc6_ms = -1;
1670         else
1671                 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
1672
1673         old->uncore_mhz = new->uncore_mhz;
1674         old->gfx_mhz = new->gfx_mhz;
1675         old->gfx_act_mhz = new->gfx_act_mhz;
1676
1677         old->energy_pkg = new->energy_pkg - old->energy_pkg;
1678         old->energy_cores = new->energy_cores - old->energy_cores;
1679         old->energy_gfx = new->energy_gfx - old->energy_gfx;
1680         old->energy_dram = new->energy_dram - old->energy_dram;
1681         old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
1682         old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
1683
1684         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1685                 if (mp->format == FORMAT_RAW)
1686                         old->counter[i] = new->counter[i];
1687                 else
1688                         old->counter[i] = new->counter[i] - old->counter[i];
1689         }
1690
1691         return 0;
1692 }
1693
1694 void delta_core(struct core_data *new, struct core_data *old)
1695 {
1696         int i;
1697         struct msr_counter *mp;
1698
1699         old->c3 = new->c3 - old->c3;
1700         old->c6 = new->c6 - old->c6;
1701         old->c7 = new->c7 - old->c7;
1702         old->core_temp_c = new->core_temp_c;
1703         old->core_throt_cnt = new->core_throt_cnt;
1704         old->mc6_us = new->mc6_us - old->mc6_us;
1705
1706         DELTA_WRAP32(new->core_energy, old->core_energy);
1707
1708         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1709                 if (mp->format == FORMAT_RAW)
1710                         old->counter[i] = new->counter[i];
1711                 else
1712                         old->counter[i] = new->counter[i] - old->counter[i];
1713         }
1714 }
1715
1716 int soft_c1_residency_display(int bic)
1717 {
1718         if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
1719                 return 0;
1720
1721         return DO_BIC_READ(bic);
1722 }
1723
1724 /*
1725  * old = new - old
1726  */
1727 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
1728 {
1729         int i;
1730         struct msr_counter *mp;
1731
1732         /* we run cpuid just the 1st time, copy the results */
1733         if (DO_BIC(BIC_APIC))
1734                 new->apic_id = old->apic_id;
1735         if (DO_BIC(BIC_X2APIC))
1736                 new->x2apic_id = old->x2apic_id;
1737
1738         /*
1739          * the timestamps from start of measurement interval are in "old"
1740          * the timestamp from end of measurement interval are in "new"
1741          * over-write old w/ new so we can print end of interval values
1742          */
1743
1744         timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
1745         old->tv_begin = new->tv_begin;
1746         old->tv_end = new->tv_end;
1747
1748         old->tsc = new->tsc - old->tsc;
1749
1750         /* check for TSC < 1 Mcycles over interval */
1751         if (old->tsc < (1000 * 1000))
1752                 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
1753                      "You can disable all c-states by booting with \"idle=poll\"\n"
1754                      "or just the deep ones with \"processor.max_cstate=1\"");
1755
1756         old->c1 = new->c1 - old->c1;
1757
1758         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
1759                 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
1760                         old->aperf = new->aperf - old->aperf;
1761                         old->mperf = new->mperf - old->mperf;
1762                 } else {
1763                         return -1;
1764                 }
1765         }
1766
1767         if (use_c1_residency_msr) {
1768                 /*
1769                  * Some models have a dedicated C1 residency MSR,
1770                  * which should be more accurate than the derivation below.
1771                  */
1772         } else {
1773                 /*
1774                  * As counter collection is not atomic,
1775                  * it is possible for mperf's non-halted cycles + idle states
1776                  * to exceed TSC's all cycles: show c1 = 0% in that case.
1777                  */
1778                 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
1779                         old->c1 = 0;
1780                 else {
1781                         /* normal case, derive c1 */
1782                         old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
1783                             - core_delta->c6 - core_delta->c7;
1784                 }
1785         }
1786
1787         if (old->mperf == 0) {
1788                 if (debug > 1)
1789                         fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
1790                 old->mperf = 1; /* divide by 0 protection */
1791         }
1792
1793         if (DO_BIC(BIC_IPC))
1794                 old->instr_count = new->instr_count - old->instr_count;
1795
1796         if (DO_BIC(BIC_IRQ))
1797                 old->irq_count = new->irq_count - old->irq_count;
1798
1799         if (DO_BIC(BIC_SMI))
1800                 old->smi_count = new->smi_count - old->smi_count;
1801
1802         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1803                 if (mp->format == FORMAT_RAW)
1804                         old->counter[i] = new->counter[i];
1805                 else
1806                         old->counter[i] = new->counter[i] - old->counter[i];
1807         }
1808         return 0;
1809 }
1810
1811 int delta_cpu(struct thread_data *t, struct core_data *c,
1812               struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
1813 {
1814         int retval = 0;
1815
1816         /* calculate core delta only for 1st thread in core */
1817         if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
1818                 delta_core(c, c2);
1819
1820         /* always calculate thread delta */
1821         retval = delta_thread(t, t2, c2);       /* c2 is core delta */
1822         if (retval)
1823                 return retval;
1824
1825         /* calculate package delta only for 1st core in package */
1826         if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
1827                 retval = delta_package(p, p2);
1828
1829         return retval;
1830 }
1831
1832 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1833 {
1834         int i;
1835         struct msr_counter *mp;
1836
1837         t->tv_begin.tv_sec = 0;
1838         t->tv_begin.tv_usec = 0;
1839         t->tv_end.tv_sec = 0;
1840         t->tv_end.tv_usec = 0;
1841         t->tv_delta.tv_sec = 0;
1842         t->tv_delta.tv_usec = 0;
1843
1844         t->tsc = 0;
1845         t->aperf = 0;
1846         t->mperf = 0;
1847         t->c1 = 0;
1848
1849         t->instr_count = 0;
1850
1851         t->irq_count = 0;
1852         t->smi_count = 0;
1853
1854         /* tells format_counters to dump all fields from this set */
1855         t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
1856
1857         c->c3 = 0;
1858         c->c6 = 0;
1859         c->c7 = 0;
1860         c->mc6_us = 0;
1861         c->core_temp_c = 0;
1862         c->core_energy = 0;
1863         c->core_throt_cnt = 0;
1864
1865         p->pkg_wtd_core_c0 = 0;
1866         p->pkg_any_core_c0 = 0;
1867         p->pkg_any_gfxe_c0 = 0;
1868         p->pkg_both_core_gfxe_c0 = 0;
1869
1870         p->pc2 = 0;
1871         if (DO_BIC(BIC_Pkgpc3))
1872                 p->pc3 = 0;
1873         if (DO_BIC(BIC_Pkgpc6))
1874                 p->pc6 = 0;
1875         if (DO_BIC(BIC_Pkgpc7))
1876                 p->pc7 = 0;
1877         p->pc8 = 0;
1878         p->pc9 = 0;
1879         p->pc10 = 0;
1880         p->cpu_lpi = 0;
1881         p->sys_lpi = 0;
1882
1883         p->energy_pkg = 0;
1884         p->energy_dram = 0;
1885         p->energy_cores = 0;
1886         p->energy_gfx = 0;
1887         p->rapl_pkg_perf_status = 0;
1888         p->rapl_dram_perf_status = 0;
1889         p->pkg_temp_c = 0;
1890
1891         p->gfx_rc6_ms = 0;
1892         p->uncore_mhz = 0;
1893         p->gfx_mhz = 0;
1894         p->gfx_act_mhz = 0;
1895         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1896                 t->counter[i] = 0;
1897
1898         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1899                 c->counter[i] = 0;
1900
1901         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1902                 p->counter[i] = 0;
1903 }
1904
1905 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1906 {
1907         int i;
1908         struct msr_counter *mp;
1909
1910         /* copy un-changing apic_id's */
1911         if (DO_BIC(BIC_APIC))
1912                 average.threads.apic_id = t->apic_id;
1913         if (DO_BIC(BIC_X2APIC))
1914                 average.threads.x2apic_id = t->x2apic_id;
1915
1916         /* remember first tv_begin */
1917         if (average.threads.tv_begin.tv_sec == 0)
1918                 average.threads.tv_begin = t->tv_begin;
1919
1920         /* remember last tv_end */
1921         average.threads.tv_end = t->tv_end;
1922
1923         average.threads.tsc += t->tsc;
1924         average.threads.aperf += t->aperf;
1925         average.threads.mperf += t->mperf;
1926         average.threads.c1 += t->c1;
1927
1928         average.threads.instr_count += t->instr_count;
1929
1930         average.threads.irq_count += t->irq_count;
1931         average.threads.smi_count += t->smi_count;
1932
1933         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1934                 if (mp->format == FORMAT_RAW)
1935                         continue;
1936                 average.threads.counter[i] += t->counter[i];
1937         }
1938
1939         /* sum per-core values only for 1st thread in core */
1940         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1941                 return 0;
1942
1943         average.cores.c3 += c->c3;
1944         average.cores.c6 += c->c6;
1945         average.cores.c7 += c->c7;
1946         average.cores.mc6_us += c->mc6_us;
1947
1948         average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
1949         average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
1950
1951         average.cores.core_energy += c->core_energy;
1952
1953         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1954                 if (mp->format == FORMAT_RAW)
1955                         continue;
1956                 average.cores.counter[i] += c->counter[i];
1957         }
1958
1959         /* sum per-pkg values only for 1st core in pkg */
1960         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1961                 return 0;
1962
1963         if (DO_BIC(BIC_Totl_c0))
1964                 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
1965         if (DO_BIC(BIC_Any_c0))
1966                 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
1967         if (DO_BIC(BIC_GFX_c0))
1968                 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
1969         if (DO_BIC(BIC_CPUGFX))
1970                 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
1971
1972         average.packages.pc2 += p->pc2;
1973         if (DO_BIC(BIC_Pkgpc3))
1974                 average.packages.pc3 += p->pc3;
1975         if (DO_BIC(BIC_Pkgpc6))
1976                 average.packages.pc6 += p->pc6;
1977         if (DO_BIC(BIC_Pkgpc7))
1978                 average.packages.pc7 += p->pc7;
1979         average.packages.pc8 += p->pc8;
1980         average.packages.pc9 += p->pc9;
1981         average.packages.pc10 += p->pc10;
1982
1983         average.packages.cpu_lpi = p->cpu_lpi;
1984         average.packages.sys_lpi = p->sys_lpi;
1985
1986         average.packages.energy_pkg += p->energy_pkg;
1987         average.packages.energy_dram += p->energy_dram;
1988         average.packages.energy_cores += p->energy_cores;
1989         average.packages.energy_gfx += p->energy_gfx;
1990
1991         average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
1992         average.packages.uncore_mhz = p->uncore_mhz;
1993         average.packages.gfx_mhz = p->gfx_mhz;
1994         average.packages.gfx_act_mhz = p->gfx_act_mhz;
1995
1996         average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
1997
1998         average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1999         average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
2000
2001         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2002                 if (mp->format == FORMAT_RAW)
2003                         continue;
2004                 average.packages.counter[i] += p->counter[i];
2005         }
2006         return 0;
2007 }
2008
2009 /*
2010  * sum the counters for all cpus in the system
2011  * compute the weighted average
2012  */
2013 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2014 {
2015         int i;
2016         struct msr_counter *mp;
2017
2018         clear_counters(&average.threads, &average.cores, &average.packages);
2019
2020         for_all_cpus(sum_counters, t, c, p);
2021
2022         /* Use the global time delta for the average. */
2023         average.threads.tv_delta = tv_delta;
2024
2025         average.threads.tsc /= topo.num_cpus;
2026         average.threads.aperf /= topo.num_cpus;
2027         average.threads.mperf /= topo.num_cpus;
2028         average.threads.instr_count /= topo.num_cpus;
2029         average.threads.c1 /= topo.num_cpus;
2030
2031         if (average.threads.irq_count > 9999999)
2032                 sums_need_wide_columns = 1;
2033
2034         average.cores.c3 /= topo.num_cores;
2035         average.cores.c6 /= topo.num_cores;
2036         average.cores.c7 /= topo.num_cores;
2037         average.cores.mc6_us /= topo.num_cores;
2038
2039         if (DO_BIC(BIC_Totl_c0))
2040                 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
2041         if (DO_BIC(BIC_Any_c0))
2042                 average.packages.pkg_any_core_c0 /= topo.num_packages;
2043         if (DO_BIC(BIC_GFX_c0))
2044                 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
2045         if (DO_BIC(BIC_CPUGFX))
2046                 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
2047
2048         average.packages.pc2 /= topo.num_packages;
2049         if (DO_BIC(BIC_Pkgpc3))
2050                 average.packages.pc3 /= topo.num_packages;
2051         if (DO_BIC(BIC_Pkgpc6))
2052                 average.packages.pc6 /= topo.num_packages;
2053         if (DO_BIC(BIC_Pkgpc7))
2054                 average.packages.pc7 /= topo.num_packages;
2055
2056         average.packages.pc8 /= topo.num_packages;
2057         average.packages.pc9 /= topo.num_packages;
2058         average.packages.pc10 /= topo.num_packages;
2059
2060         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2061                 if (mp->format == FORMAT_RAW)
2062                         continue;
2063                 if (mp->type == COUNTER_ITEMS) {
2064                         if (average.threads.counter[i] > 9999999)
2065                                 sums_need_wide_columns = 1;
2066                         continue;
2067                 }
2068                 average.threads.counter[i] /= topo.num_cpus;
2069         }
2070         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2071                 if (mp->format == FORMAT_RAW)
2072                         continue;
2073                 if (mp->type == COUNTER_ITEMS) {
2074                         if (average.cores.counter[i] > 9999999)
2075                                 sums_need_wide_columns = 1;
2076                 }
2077                 average.cores.counter[i] /= topo.num_cores;
2078         }
2079         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2080                 if (mp->format == FORMAT_RAW)
2081                         continue;
2082                 if (mp->type == COUNTER_ITEMS) {
2083                         if (average.packages.counter[i] > 9999999)
2084                                 sums_need_wide_columns = 1;
2085                 }
2086                 average.packages.counter[i] /= topo.num_packages;
2087         }
2088 }
2089
2090 static unsigned long long rdtsc(void)
2091 {
2092         unsigned int low, high;
2093
2094         asm volatile ("rdtsc":"=a" (low), "=d"(high));
2095
2096         return low | ((unsigned long long)high) << 32;
2097 }
2098
2099 /*
2100  * Open a file, and exit on failure
2101  */
2102 FILE *fopen_or_die(const char *path, const char *mode)
2103 {
2104         FILE *filep = fopen(path, mode);
2105
2106         if (!filep)
2107                 err(1, "%s: open failed", path);
2108         return filep;
2109 }
2110
2111 /*
2112  * snapshot_sysfs_counter()
2113  *
2114  * return snapshot of given counter
2115  */
2116 unsigned long long snapshot_sysfs_counter(char *path)
2117 {
2118         FILE *fp;
2119         int retval;
2120         unsigned long long counter;
2121
2122         fp = fopen_or_die(path, "r");
2123
2124         retval = fscanf(fp, "%lld", &counter);
2125         if (retval != 1)
2126                 err(1, "snapshot_sysfs_counter(%s)", path);
2127
2128         fclose(fp);
2129
2130         return counter;
2131 }
2132
2133 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
2134 {
2135         if (mp->msr_num != 0) {
2136                 if (get_msr(cpu, mp->msr_num, counterp))
2137                         return -1;
2138         } else {
2139                 char path[128 + PATH_BYTES];
2140
2141                 if (mp->flags & SYSFS_PERCPU) {
2142                         sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path);
2143
2144                         *counterp = snapshot_sysfs_counter(path);
2145                 } else {
2146                         *counterp = snapshot_sysfs_counter(mp->path);
2147                 }
2148         }
2149
2150         return 0;
2151 }
2152
2153 unsigned long long get_uncore_mhz(int package, int die)
2154 {
2155         char path[128];
2156
2157         sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
2158                 die);
2159
2160         return (snapshot_sysfs_counter(path) / 1000);
2161 }
2162
2163 int get_epb(int cpu)
2164 {
2165         char path[128 + PATH_BYTES];
2166         unsigned long long msr;
2167         int ret, epb = -1;
2168         FILE *fp;
2169
2170         sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
2171
2172         fp = fopen(path, "r");
2173         if (!fp)
2174                 goto msr_fallback;
2175
2176         ret = fscanf(fp, "%d", &epb);
2177         if (ret != 1)
2178                 err(1, "%s(%s)", __func__, path);
2179
2180         fclose(fp);
2181
2182         return epb;
2183
2184 msr_fallback:
2185         get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
2186
2187         return msr & 0xf;
2188 }
2189
2190 void get_apic_id(struct thread_data *t)
2191 {
2192         unsigned int eax, ebx, ecx, edx;
2193
2194         if (DO_BIC(BIC_APIC)) {
2195                 eax = ebx = ecx = edx = 0;
2196                 __cpuid(1, eax, ebx, ecx, edx);
2197
2198                 t->apic_id = (ebx >> 24) & 0xff;
2199         }
2200
2201         if (!DO_BIC(BIC_X2APIC))
2202                 return;
2203
2204         if (authentic_amd || hygon_genuine) {
2205                 unsigned int topology_extensions;
2206
2207                 if (max_extended_level < 0x8000001e)
2208                         return;
2209
2210                 eax = ebx = ecx = edx = 0;
2211                 __cpuid(0x80000001, eax, ebx, ecx, edx);
2212                 topology_extensions = ecx & (1 << 22);
2213
2214                 if (topology_extensions == 0)
2215                         return;
2216
2217                 eax = ebx = ecx = edx = 0;
2218                 __cpuid(0x8000001e, eax, ebx, ecx, edx);
2219
2220                 t->x2apic_id = eax;
2221                 return;
2222         }
2223
2224         if (!genuine_intel)
2225                 return;
2226
2227         if (max_level < 0xb)
2228                 return;
2229
2230         ecx = 0;
2231         __cpuid(0xb, eax, ebx, ecx, edx);
2232         t->x2apic_id = edx;
2233
2234         if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
2235                 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
2236 }
2237
2238 int get_core_throt_cnt(int cpu, unsigned long long *cnt)
2239 {
2240         char path[128 + PATH_BYTES];
2241         unsigned long long tmp;
2242         FILE *fp;
2243         int ret;
2244
2245         sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
2246         fp = fopen(path, "r");
2247         if (!fp)
2248                 return -1;
2249         ret = fscanf(fp, "%lld", &tmp);
2250         fclose(fp);
2251         if (ret != 1)
2252                 return -1;
2253         *cnt = tmp;
2254
2255         return 0;
2256 }
2257
2258 /*
2259  * get_counters(...)
2260  * migrate to cpu
2261  * acquire and record local counters for that cpu
2262  */
2263 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2264 {
2265         int cpu = t->cpu_id;
2266         unsigned long long msr;
2267         int aperf_mperf_retry_count = 0;
2268         struct msr_counter *mp;
2269         int i;
2270
2271         if (cpu_migrate(cpu)) {
2272                 fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
2273                 return -1;
2274         }
2275
2276         gettimeofday(&t->tv_begin, (struct timezone *)NULL);
2277
2278         if (first_counter_read)
2279                 get_apic_id(t);
2280 retry:
2281         t->tsc = rdtsc();       /* we are running on local CPU of interest */
2282
2283         if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
2284                 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
2285
2286                 /*
2287                  * The TSC, APERF and MPERF must be read together for
2288                  * APERF/MPERF and MPERF/TSC to give accurate results.
2289                  *
2290                  * Unfortunately, APERF and MPERF are read by
2291                  * individual system call, so delays may occur
2292                  * between them.  If the time to read them
2293                  * varies by a large amount, we re-read them.
2294                  */
2295
2296                 /*
2297                  * This initial dummy APERF read has been seen to
2298                  * reduce jitter in the subsequent reads.
2299                  */
2300
2301                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2302                         return -3;
2303
2304                 t->tsc = rdtsc();       /* re-read close to APERF */
2305
2306                 tsc_before = t->tsc;
2307
2308                 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
2309                         return -3;
2310
2311                 tsc_between = rdtsc();
2312
2313                 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
2314                         return -4;
2315
2316                 tsc_after = rdtsc();
2317
2318                 aperf_time = tsc_between - tsc_before;
2319                 mperf_time = tsc_after - tsc_between;
2320
2321                 /*
2322                  * If the system call latency to read APERF and MPERF
2323                  * differ by more than 2x, then try again.
2324                  */
2325                 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
2326                         aperf_mperf_retry_count++;
2327                         if (aperf_mperf_retry_count < 5)
2328                                 goto retry;
2329                         else
2330                                 warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
2331                 }
2332                 aperf_mperf_retry_count = 0;
2333
2334                 t->aperf = t->aperf * aperf_mperf_multiplier;
2335                 t->mperf = t->mperf * aperf_mperf_multiplier;
2336         }
2337
2338         if (DO_BIC(BIC_IPC))
2339                 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
2340                         return -4;
2341
2342         if (DO_BIC(BIC_IRQ))
2343                 t->irq_count = irqs_per_cpu[cpu];
2344         if (DO_BIC(BIC_SMI)) {
2345                 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
2346                         return -5;
2347                 t->smi_count = msr & 0xFFFFFFFF;
2348         }
2349         if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
2350                 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
2351                         return -6;
2352         }
2353
2354         for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2355                 if (get_mp(cpu, mp, &t->counter[i]))
2356                         return -10;
2357         }
2358
2359         /* collect core counters only for 1st thread in core */
2360         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2361                 goto done;
2362
2363         if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
2364                 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
2365                         return -6;
2366         }
2367
2368         if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
2369                 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
2370                         return -7;
2371         } else if (do_knl_cstates && soft_c1_residency_display(BIC_CPU_c6)) {
2372                 if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
2373                         return -7;
2374         }
2375
2376         if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
2377                 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
2378                         return -8;
2379                 else if (t->is_atom) {
2380                         /*
2381                          * For Atom CPUs that has core cstate deeper than c6,
2382                          * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
2383                          * Minus CC7 (and deeper cstates) residency to get
2384                          * accturate cc6 residency.
2385                          */
2386                         c->c6 -= c->c7;
2387                 }
2388         }
2389
2390         if (DO_BIC(BIC_Mod_c6))
2391                 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
2392                         return -8;
2393
2394         if (DO_BIC(BIC_CoreTmp)) {
2395                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
2396                         return -9;
2397                 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
2398         }
2399
2400         if (DO_BIC(BIC_CORE_THROT_CNT))
2401                 get_core_throt_cnt(cpu, &c->core_throt_cnt);
2402
2403         if (do_rapl & RAPL_AMD_F17H) {
2404                 if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
2405                         return -14;
2406                 c->core_energy = msr & 0xFFFFFFFF;
2407         }
2408
2409         for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2410                 if (get_mp(cpu, mp, &c->counter[i]))
2411                         return -10;
2412         }
2413
2414         /* collect package counters only for 1st core in package */
2415         if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2416                 goto done;
2417
2418         if (DO_BIC(BIC_Totl_c0)) {
2419                 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
2420                         return -10;
2421         }
2422         if (DO_BIC(BIC_Any_c0)) {
2423                 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
2424                         return -11;
2425         }
2426         if (DO_BIC(BIC_GFX_c0)) {
2427                 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
2428                         return -12;
2429         }
2430         if (DO_BIC(BIC_CPUGFX)) {
2431                 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
2432                         return -13;
2433         }
2434         if (DO_BIC(BIC_Pkgpc3))
2435                 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
2436                         return -9;
2437         if (DO_BIC(BIC_Pkgpc6)) {
2438                 if (do_slm_cstates) {
2439                         if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
2440                                 return -10;
2441                 } else {
2442                         if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
2443                                 return -10;
2444                 }
2445         }
2446
2447         if (DO_BIC(BIC_Pkgpc2))
2448                 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
2449                         return -11;
2450         if (DO_BIC(BIC_Pkgpc7))
2451                 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
2452                         return -12;
2453         if (DO_BIC(BIC_Pkgpc8))
2454                 if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
2455                         return -13;
2456         if (DO_BIC(BIC_Pkgpc9))
2457                 if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
2458                         return -13;
2459         if (DO_BIC(BIC_Pkgpc10))
2460                 if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
2461                         return -13;
2462
2463         if (DO_BIC(BIC_CPU_LPI))
2464                 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
2465         if (DO_BIC(BIC_SYS_LPI))
2466                 p->sys_lpi = cpuidle_cur_sys_lpi_us;
2467
2468         if (do_rapl & RAPL_PKG) {
2469                 if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
2470                         return -13;
2471                 p->energy_pkg = msr;
2472         }
2473         if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
2474                 if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
2475                         return -14;
2476                 p->energy_cores = msr;
2477         }
2478         if (do_rapl & RAPL_DRAM) {
2479                 if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
2480                         return -15;
2481                 p->energy_dram = msr;
2482         }
2483         if (do_rapl & RAPL_GFX) {
2484                 if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
2485                         return -16;
2486                 p->energy_gfx = msr;
2487         }
2488         if (do_rapl & RAPL_PKG_PERF_STATUS) {
2489                 if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
2490                         return -16;
2491                 p->rapl_pkg_perf_status = msr;
2492         }
2493         if (do_rapl & RAPL_DRAM_PERF_STATUS) {
2494                 if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
2495                         return -16;
2496                 p->rapl_dram_perf_status = msr;
2497         }
2498         if (do_rapl & RAPL_AMD_F17H) {
2499                 if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
2500                         return -13;
2501                 p->energy_pkg = msr;
2502         }
2503         if (DO_BIC(BIC_PkgTmp)) {
2504                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
2505                         return -17;
2506                 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
2507         }
2508
2509         if (DO_BIC(BIC_GFX_rc6))
2510                 p->gfx_rc6_ms = gfx_cur_rc6_ms;
2511
2512         /* n.b. assume die0 uncore frequency applies to whole package */
2513         if (DO_BIC(BIC_UNCORE_MHZ))
2514                 p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
2515
2516         if (DO_BIC(BIC_GFXMHz))
2517                 p->gfx_mhz = gfx_cur_mhz;
2518
2519         if (DO_BIC(BIC_GFXACTMHz))
2520                 p->gfx_act_mhz = gfx_act_mhz;
2521
2522         for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2523                 if (get_mp(cpu, mp, &p->counter[i]))
2524                         return -10;
2525         }
2526 done:
2527         gettimeofday(&t->tv_end, (struct timezone *)NULL);
2528
2529         return 0;
2530 }
2531
2532 /*
2533  * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
2534  * If you change the values, note they are used both in comparisons
2535  * (>= PCL__7) and to index pkg_cstate_limit_strings[].
2536  */
2537
2538 #define PCLUKN 0                /* Unknown */
2539 #define PCLRSV 1                /* Reserved */
2540 #define PCL__0 2                /* PC0 */
2541 #define PCL__1 3                /* PC1 */
2542 #define PCL__2 4                /* PC2 */
2543 #define PCL__3 5                /* PC3 */
2544 #define PCL__4 6                /* PC4 */
2545 #define PCL__6 7                /* PC6 */
2546 #define PCL_6N 8                /* PC6 No Retention */
2547 #define PCL_6R 9                /* PC6 Retention */
2548 #define PCL__7 10               /* PC7 */
2549 #define PCL_7S 11               /* PC7 Shrink */
2550 #define PCL__8 12               /* PC8 */
2551 #define PCL__9 13               /* PC9 */
2552 #define PCL_10 14               /* PC10 */
2553 #define PCLUNL 15               /* Unlimited */
2554
2555 int pkg_cstate_limit = PCLUKN;
2556 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
2557         "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
2558 };
2559
2560 int nhm_pkg_cstate_limits[16] =
2561     { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2562         PCLRSV, PCLRSV
2563 };
2564
2565 int snb_pkg_cstate_limits[16] =
2566     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2567         PCLRSV, PCLRSV
2568 };
2569
2570 int hsw_pkg_cstate_limits[16] =
2571     { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2572         PCLRSV, PCLRSV
2573 };
2574
2575 int slv_pkg_cstate_limits[16] =
2576     { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2577         PCL__6, PCL__7
2578 };
2579
2580 int amt_pkg_cstate_limits[16] =
2581     { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2582         PCLRSV, PCLRSV
2583 };
2584
2585 int phi_pkg_cstate_limits[16] =
2586     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2587         PCLRSV, PCLRSV
2588 };
2589
2590 int glm_pkg_cstate_limits[16] =
2591     { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2592         PCLRSV, PCLRSV
2593 };
2594
2595 int skx_pkg_cstate_limits[16] =
2596     { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2597         PCLRSV, PCLRSV
2598 };
2599
2600 int icx_pkg_cstate_limits[16] =
2601     { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
2602         PCLRSV, PCLRSV
2603 };
2604
2605 static void calculate_tsc_tweak()
2606 {
2607         tsc_tweak = base_hz / tsc_hz;
2608 }
2609
2610 void prewake_cstate_probe(unsigned int family, unsigned int model);
2611
2612 static void dump_nhm_platform_info(void)
2613 {
2614         unsigned long long msr;
2615         unsigned int ratio;
2616
2617         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
2618
2619         fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
2620
2621         ratio = (msr >> 40) & 0xFF;
2622         fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
2623
2624         ratio = (msr >> 8) & 0xFF;
2625         fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2626
2627         get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
2628         fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
2629                 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
2630
2631         /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
2632         if (dis_cstate_prewake)
2633                 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
2634
2635         return;
2636 }
2637
2638 static void dump_hsw_turbo_ratio_limits(void)
2639 {
2640         unsigned long long msr;
2641         unsigned int ratio;
2642
2643         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
2644
2645         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
2646
2647         ratio = (msr >> 8) & 0xFF;
2648         if (ratio)
2649                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
2650
2651         ratio = (msr >> 0) & 0xFF;
2652         if (ratio)
2653                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
2654         return;
2655 }
2656
2657 static void dump_ivt_turbo_ratio_limits(void)
2658 {
2659         unsigned long long msr;
2660         unsigned int ratio;
2661
2662         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
2663
2664         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
2665
2666         ratio = (msr >> 56) & 0xFF;
2667         if (ratio)
2668                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
2669
2670         ratio = (msr >> 48) & 0xFF;
2671         if (ratio)
2672                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
2673
2674         ratio = (msr >> 40) & 0xFF;
2675         if (ratio)
2676                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
2677
2678         ratio = (msr >> 32) & 0xFF;
2679         if (ratio)
2680                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
2681
2682         ratio = (msr >> 24) & 0xFF;
2683         if (ratio)
2684                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
2685
2686         ratio = (msr >> 16) & 0xFF;
2687         if (ratio)
2688                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
2689
2690         ratio = (msr >> 8) & 0xFF;
2691         if (ratio)
2692                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
2693
2694         ratio = (msr >> 0) & 0xFF;
2695         if (ratio)
2696                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
2697         return;
2698 }
2699
2700 int has_turbo_ratio_group_limits(int family, int model)
2701 {
2702
2703         if (!genuine_intel)
2704                 return 0;
2705
2706         if (family != 6)
2707                 return 0;
2708
2709         switch (model) {
2710         case INTEL_FAM6_ATOM_GOLDMONT:
2711         case INTEL_FAM6_SKYLAKE_X:
2712         case INTEL_FAM6_ICELAKE_X:
2713         case INTEL_FAM6_SAPPHIRERAPIDS_X:
2714         case INTEL_FAM6_ATOM_GOLDMONT_D:
2715         case INTEL_FAM6_ATOM_TREMONT_D:
2716                 return 1;
2717         default:
2718                 return 0;
2719         }
2720 }
2721
2722 static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
2723 {
2724         unsigned long long msr, core_counts;
2725         int shift;
2726
2727         get_msr(base_cpu, trl_msr_offset, &msr);
2728         fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
2729                 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
2730
2731         if (has_turbo_ratio_group_limits(family, model)) {
2732                 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
2733                 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
2734         } else {
2735                 core_counts = 0x0807060504030201;
2736         }
2737
2738         for (shift = 56; shift >= 0; shift -= 8) {
2739                 unsigned int ratio, group_size;
2740
2741                 ratio = (msr >> shift) & 0xFF;
2742                 group_size = (core_counts >> shift) & 0xFF;
2743                 if (ratio)
2744                         fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2745                                 ratio, bclk, ratio * bclk, group_size);
2746         }
2747
2748         return;
2749 }
2750
2751 static void dump_atom_turbo_ratio_limits(void)
2752 {
2753         unsigned long long msr;
2754         unsigned int ratio;
2755
2756         get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
2757         fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2758
2759         ratio = (msr >> 0) & 0x3F;
2760         if (ratio)
2761                 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
2762
2763         ratio = (msr >> 8) & 0x3F;
2764         if (ratio)
2765                 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
2766
2767         ratio = (msr >> 16) & 0x3F;
2768         if (ratio)
2769                 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
2770
2771         get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
2772         fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
2773
2774         ratio = (msr >> 24) & 0x3F;
2775         if (ratio)
2776                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
2777
2778         ratio = (msr >> 16) & 0x3F;
2779         if (ratio)
2780                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
2781
2782         ratio = (msr >> 8) & 0x3F;
2783         if (ratio)
2784                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
2785
2786         ratio = (msr >> 0) & 0x3F;
2787         if (ratio)
2788                 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
2789 }
2790
2791 static void dump_knl_turbo_ratio_limits(void)
2792 {
2793         const unsigned int buckets_no = 7;
2794
2795         unsigned long long msr;
2796         int delta_cores, delta_ratio;
2797         int i, b_nr;
2798         unsigned int cores[buckets_no];
2799         unsigned int ratio[buckets_no];
2800
2801         get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
2802
2803         fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
2804
2805         /*
2806          * Turbo encoding in KNL is as follows:
2807          * [0] -- Reserved
2808          * [7:1] -- Base value of number of active cores of bucket 1.
2809          * [15:8] -- Base value of freq ratio of bucket 1.
2810          * [20:16] -- +ve delta of number of active cores of bucket 2.
2811          * i.e. active cores of bucket 2 =
2812          * active cores of bucket 1 + delta
2813          * [23:21] -- Negative delta of freq ratio of bucket 2.
2814          * i.e. freq ratio of bucket 2 =
2815          * freq ratio of bucket 1 - delta
2816          * [28:24]-- +ve delta of number of active cores of bucket 3.
2817          * [31:29]-- -ve delta of freq ratio of bucket 3.
2818          * [36:32]-- +ve delta of number of active cores of bucket 4.
2819          * [39:37]-- -ve delta of freq ratio of bucket 4.
2820          * [44:40]-- +ve delta of number of active cores of bucket 5.
2821          * [47:45]-- -ve delta of freq ratio of bucket 5.
2822          * [52:48]-- +ve delta of number of active cores of bucket 6.
2823          * [55:53]-- -ve delta of freq ratio of bucket 6.
2824          * [60:56]-- +ve delta of number of active cores of bucket 7.
2825          * [63:61]-- -ve delta of freq ratio of bucket 7.
2826          */
2827
2828         b_nr = 0;
2829         cores[b_nr] = (msr & 0xFF) >> 1;
2830         ratio[b_nr] = (msr >> 8) & 0xFF;
2831
2832         for (i = 16; i < 64; i += 8) {
2833                 delta_cores = (msr >> i) & 0x1F;
2834                 delta_ratio = (msr >> (i + 5)) & 0x7;
2835
2836                 cores[b_nr + 1] = cores[b_nr] + delta_cores;
2837                 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
2838                 b_nr++;
2839         }
2840
2841         for (i = buckets_no - 1; i >= 0; i--)
2842                 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
2843                         fprintf(outf,
2844                                 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
2845                                 ratio[i], bclk, ratio[i] * bclk, cores[i]);
2846 }
2847
2848 static void dump_nhm_cst_cfg(void)
2849 {
2850         unsigned long long msr;
2851
2852         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
2853
2854         fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
2855
2856         fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
2857                 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
2858                 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
2859                 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
2860                 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
2861                 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
2862
2863 #define AUTOMATIC_CSTATE_CONVERSION             (1UL << 16)
2864         if (has_automatic_cstate_conversion) {
2865                 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
2866         }
2867
2868         fprintf(outf, ")\n");
2869
2870         return;
2871 }
2872
2873 static void dump_config_tdp(void)
2874 {
2875         unsigned long long msr;
2876
2877         get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
2878         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
2879         fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
2880
2881         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
2882         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
2883         if (msr) {
2884                 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2885                 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2886                 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2887                 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
2888         }
2889         fprintf(outf, ")\n");
2890
2891         get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
2892         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
2893         if (msr) {
2894                 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
2895                 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
2896                 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
2897                 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
2898         }
2899         fprintf(outf, ")\n");
2900
2901         get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
2902         fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
2903         if ((msr) & 0x3)
2904                 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
2905         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2906         fprintf(outf, ")\n");
2907
2908         get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
2909         fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
2910         fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
2911         fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
2912         fprintf(outf, ")\n");
2913 }
2914
2915 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
2916
2917 void print_irtl(void)
2918 {
2919         unsigned long long msr;
2920
2921         get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
2922         fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
2923         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2924                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2925
2926         get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
2927         fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
2928         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2929                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2930
2931         get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
2932         fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
2933         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2934                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2935
2936         if (!do_irtl_hsw)
2937                 return;
2938
2939         get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
2940         fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
2941         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2942                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2943
2944         get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
2945         fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
2946         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2947                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2948
2949         get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
2950         fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
2951         fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
2952                 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
2953
2954 }
2955
2956 void free_fd_percpu(void)
2957 {
2958         int i;
2959
2960         for (i = 0; i < topo.max_cpu_num + 1; ++i) {
2961                 if (fd_percpu[i] != 0)
2962                         close(fd_percpu[i]);
2963         }
2964
2965         free(fd_percpu);
2966 }
2967
2968 void free_all_buffers(void)
2969 {
2970         int i;
2971
2972         CPU_FREE(cpu_present_set);
2973         cpu_present_set = NULL;
2974         cpu_present_setsize = 0;
2975
2976         CPU_FREE(cpu_affinity_set);
2977         cpu_affinity_set = NULL;
2978         cpu_affinity_setsize = 0;
2979
2980         free(thread_even);
2981         free(core_even);
2982         free(package_even);
2983
2984         thread_even = NULL;
2985         core_even = NULL;
2986         package_even = NULL;
2987
2988         free(thread_odd);
2989         free(core_odd);
2990         free(package_odd);
2991
2992         thread_odd = NULL;
2993         core_odd = NULL;
2994         package_odd = NULL;
2995
2996         free(output_buffer);
2997         output_buffer = NULL;
2998         outp = NULL;
2999
3000         free_fd_percpu();
3001
3002         free(irq_column_2_cpu);
3003         free(irqs_per_cpu);
3004
3005         for (i = 0; i <= topo.max_cpu_num; ++i) {
3006                 if (cpus[i].put_ids)
3007                         CPU_FREE(cpus[i].put_ids);
3008         }
3009         free(cpus);
3010 }
3011
3012 /*
3013  * Parse a file containing a single int.
3014  * Return 0 if file can not be opened
3015  * Exit if file can be opened, but can not be parsed
3016  */
3017 int parse_int_file(const char *fmt, ...)
3018 {
3019         va_list args;
3020         char path[PATH_MAX];
3021         FILE *filep;
3022         int value;
3023
3024         va_start(args, fmt);
3025         vsnprintf(path, sizeof(path), fmt, args);
3026         va_end(args);
3027         filep = fopen(path, "r");
3028         if (!filep)
3029                 return 0;
3030         if (fscanf(filep, "%d", &value) != 1)
3031                 err(1, "%s: failed to parse number from file", path);
3032         fclose(filep);
3033         return value;
3034 }
3035
3036 /*
3037  * cpu_is_first_core_in_package(cpu)
3038  * return 1 if given CPU is 1st core in package
3039  */
3040 int cpu_is_first_core_in_package(int cpu)
3041 {
3042         return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
3043 }
3044
3045 int get_physical_package_id(int cpu)
3046 {
3047         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
3048 }
3049
3050 int get_die_id(int cpu)
3051 {
3052         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
3053 }
3054
3055 int get_core_id(int cpu)
3056 {
3057         return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
3058 }
3059
3060 void set_node_data(void)
3061 {
3062         int pkg, node, lnode, cpu, cpux;
3063         int cpu_count;
3064
3065         /* initialize logical_node_id */
3066         for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
3067                 cpus[cpu].logical_node_id = -1;
3068
3069         cpu_count = 0;
3070         for (pkg = 0; pkg < topo.num_packages; pkg++) {
3071                 lnode = 0;
3072                 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
3073                         if (cpus[cpu].physical_package_id != pkg)
3074                                 continue;
3075                         /* find a cpu with an unset logical_node_id */
3076                         if (cpus[cpu].logical_node_id != -1)
3077                                 continue;
3078                         cpus[cpu].logical_node_id = lnode;
3079                         node = cpus[cpu].physical_node_id;
3080                         cpu_count++;
3081                         /*
3082                          * find all matching cpus on this pkg and set
3083                          * the logical_node_id
3084                          */
3085                         for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
3086                                 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
3087                                         cpus[cpux].logical_node_id = lnode;
3088                                         cpu_count++;
3089                                 }
3090                         }
3091                         lnode++;
3092                         if (lnode > topo.nodes_per_pkg)
3093                                 topo.nodes_per_pkg = lnode;
3094                 }
3095                 if (cpu_count >= topo.max_cpu_num)
3096                         break;
3097         }
3098 }
3099
3100 int get_physical_node_id(struct cpu_topology *thiscpu)
3101 {
3102         char path[80];
3103         FILE *filep;
3104         int i;
3105         int cpu = thiscpu->logical_cpu_id;
3106
3107         for (i = 0; i <= topo.max_cpu_num; i++) {
3108                 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
3109                 filep = fopen(path, "r");
3110                 if (!filep)
3111                         continue;
3112                 fclose(filep);
3113                 return i;
3114         }
3115         return -1;
3116 }
3117
3118 int get_thread_siblings(struct cpu_topology *thiscpu)
3119 {
3120         char path[80], character;
3121         FILE *filep;
3122         unsigned long map;
3123         int so, shift, sib_core;
3124         int cpu = thiscpu->logical_cpu_id;
3125         int offset = topo.max_cpu_num + 1;
3126         size_t size;
3127         int thread_id = 0;
3128
3129         thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
3130         if (thiscpu->thread_id < 0)
3131                 thiscpu->thread_id = thread_id++;
3132         if (!thiscpu->put_ids)
3133                 return -1;
3134
3135         size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
3136         CPU_ZERO_S(size, thiscpu->put_ids);
3137
3138         sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
3139         filep = fopen(path, "r");
3140
3141         if (!filep) {
3142                 warnx("%s: open failed", path);
3143                 return -1;
3144         }
3145         do {
3146                 offset -= BITMASK_SIZE;
3147                 if (fscanf(filep, "%lx%c", &map, &character) != 2)
3148                         err(1, "%s: failed to parse file", path);
3149                 for (shift = 0; shift < BITMASK_SIZE; shift++) {
3150                         if ((map >> shift) & 0x1) {
3151                                 so = shift + offset;
3152                                 sib_core = get_core_id(so);
3153                                 if (sib_core == thiscpu->physical_core_id) {
3154                                         CPU_SET_S(so, size, thiscpu->put_ids);
3155                                         if ((so != cpu) && (cpus[so].thread_id < 0))
3156                                                 cpus[so].thread_id = thread_id++;
3157                                 }
3158                         }
3159                 }
3160         } while (character == ',');
3161         fclose(filep);
3162
3163         return CPU_COUNT_S(size, thiscpu->put_ids);
3164 }
3165
3166 /*
3167  * run func(thread, core, package) in topology order
3168  * skip non-present cpus
3169  */
3170
3171 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
3172                                struct pkg_data *, struct thread_data *, struct core_data *,
3173                                struct pkg_data *), struct thread_data *thread_base,
3174                    struct core_data *core_base, struct pkg_data *pkg_base,
3175                    struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
3176 {
3177         int retval, pkg_no, node_no, core_no, thread_no;
3178
3179         for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
3180                 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
3181                         for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
3182                                 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
3183                                         struct thread_data *t, *t2;
3184                                         struct core_data *c, *c2;
3185                                         struct pkg_data *p, *p2;
3186
3187                                         t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
3188
3189                                         if (cpu_is_not_present(t->cpu_id))
3190                                                 continue;
3191
3192                                         t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
3193
3194                                         c = GET_CORE(core_base, core_no, node_no, pkg_no);
3195                                         c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
3196
3197                                         p = GET_PKG(pkg_base, pkg_no);
3198                                         p2 = GET_PKG(pkg_base2, pkg_no);
3199
3200                                         retval = func(t, c, p, t2, c2, p2);
3201                                         if (retval)
3202                                                 return retval;
3203                                 }
3204                         }
3205                 }
3206         }
3207         return 0;
3208 }
3209
3210 /*
3211  * run func(cpu) on every cpu in /proc/stat
3212  * return max_cpu number
3213  */
3214 int for_all_proc_cpus(int (func) (int))
3215 {
3216         FILE *fp;
3217         int cpu_num;
3218         int retval;
3219
3220         fp = fopen_or_die(proc_stat, "r");
3221
3222         retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
3223         if (retval != 0)
3224                 err(1, "%s: failed to parse format", proc_stat);
3225
3226         while (1) {
3227                 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
3228                 if (retval != 1)
3229                         break;
3230
3231                 retval = func(cpu_num);
3232                 if (retval) {
3233                         fclose(fp);
3234                         return (retval);
3235                 }
3236         }
3237         fclose(fp);
3238         return 0;
3239 }
3240
3241 void re_initialize(void)
3242 {
3243         free_all_buffers();
3244         setup_all_buffers();
3245         fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
3246 }
3247
3248 void set_max_cpu_num(void)
3249 {
3250         FILE *filep;
3251         int base_cpu;
3252         unsigned long dummy;
3253         char pathname[64];
3254
3255         base_cpu = sched_getcpu();
3256         if (base_cpu < 0)
3257                 err(1, "cannot find calling cpu ID");
3258         sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
3259
3260         filep = fopen_or_die(pathname, "r");
3261         topo.max_cpu_num = 0;
3262         while (fscanf(filep, "%lx,", &dummy) == 1)
3263                 topo.max_cpu_num += BITMASK_SIZE;
3264         fclose(filep);
3265         topo.max_cpu_num--;     /* 0 based */
3266 }
3267
3268 /*
3269  * count_cpus()
3270  * remember the last one seen, it will be the max
3271  */
3272 int count_cpus(int cpu)
3273 {
3274         UNUSED(cpu);
3275
3276         topo.num_cpus++;
3277         return 0;
3278 }
3279
3280 int mark_cpu_present(int cpu)
3281 {
3282         CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
3283         return 0;
3284 }
3285
3286 int init_thread_id(int cpu)
3287 {
3288         cpus[cpu].thread_id = -1;
3289         return 0;
3290 }
3291
3292 /*
3293  * snapshot_proc_interrupts()
3294  *
3295  * read and record summary of /proc/interrupts
3296  *
3297  * return 1 if config change requires a restart, else return 0
3298  */
3299 int snapshot_proc_interrupts(void)
3300 {
3301         static FILE *fp;
3302         int column, retval;
3303
3304         if (fp == NULL)
3305                 fp = fopen_or_die("/proc/interrupts", "r");
3306         else
3307                 rewind(fp);
3308
3309         /* read 1st line of /proc/interrupts to get cpu* name for each column */
3310         for (column = 0; column < topo.num_cpus; ++column) {
3311                 int cpu_number;
3312
3313                 retval = fscanf(fp, " CPU%d", &cpu_number);
3314                 if (retval != 1)
3315                         break;
3316
3317                 if (cpu_number > topo.max_cpu_num) {
3318                         warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
3319                         return 1;
3320                 }
3321
3322                 irq_column_2_cpu[column] = cpu_number;
3323                 irqs_per_cpu[cpu_number] = 0;
3324         }
3325
3326         /* read /proc/interrupt count lines and sum up irqs per cpu */
3327         while (1) {
3328                 int column;
3329                 char buf[64];
3330
3331                 retval = fscanf(fp, " %s:", buf);       /* flush irq# "N:" */
3332                 if (retval != 1)
3333                         break;
3334
3335                 /* read the count per cpu */
3336                 for (column = 0; column < topo.num_cpus; ++column) {
3337
3338                         int cpu_number, irq_count;
3339
3340                         retval = fscanf(fp, " %d", &irq_count);
3341                         if (retval != 1)
3342                                 break;
3343
3344                         cpu_number = irq_column_2_cpu[column];
3345                         irqs_per_cpu[cpu_number] += irq_count;
3346
3347                 }
3348
3349                 while (getc(fp) != '\n') ;      /* flush interrupt description */
3350
3351         }
3352         return 0;
3353 }
3354
3355 /*
3356  * snapshot_gfx_rc6_ms()
3357  *
3358  * record snapshot of
3359  * /sys/class/drm/card0/power/rc6_residency_ms
3360  *
3361  * return 1 if config change requires a restart, else return 0
3362  */
3363 int snapshot_gfx_rc6_ms(void)
3364 {
3365         FILE *fp;
3366         int retval;
3367
3368         fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
3369
3370         retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
3371         if (retval != 1)
3372                 err(1, "GFX rc6");
3373
3374         fclose(fp);
3375
3376         return 0;
3377 }
3378
3379 /*
3380  * snapshot_gfx_mhz()
3381  *
3382  * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
3383  * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
3384  *
3385  * return 1 if config change requires a restart, else return 0
3386  */
3387 int snapshot_gfx_mhz(void)
3388 {
3389         static FILE *fp;
3390         int retval;
3391
3392         if (fp == NULL) {
3393                 fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
3394                 if (!fp)
3395                         fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
3396         } else {
3397                 rewind(fp);
3398                 fflush(fp);
3399         }
3400
3401         retval = fscanf(fp, "%d", &gfx_cur_mhz);
3402         if (retval != 1)
3403                 err(1, "GFX MHz");
3404
3405         return 0;
3406 }
3407
3408 /*
3409  * snapshot_gfx_cur_mhz()
3410  *
3411  * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
3412  * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
3413  *
3414  * return 1 if config change requires a restart, else return 0
3415  */
3416 int snapshot_gfx_act_mhz(void)
3417 {
3418         static FILE *fp;
3419         int retval;
3420
3421         if (fp == NULL) {
3422                 fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
3423                 if (!fp)
3424                         fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
3425         } else {
3426                 rewind(fp);
3427                 fflush(fp);
3428         }
3429
3430         retval = fscanf(fp, "%d", &gfx_act_mhz);
3431         if (retval != 1)
3432                 err(1, "GFX ACT MHz");
3433
3434         return 0;
3435 }
3436
3437 /*
3438  * snapshot_cpu_lpi()
3439  *
3440  * record snapshot of
3441  * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
3442  */
3443 int snapshot_cpu_lpi_us(void)
3444 {
3445         FILE *fp;
3446         int retval;
3447
3448         fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
3449
3450         retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
3451         if (retval != 1) {
3452                 fprintf(stderr, "Disabling Low Power Idle CPU output\n");
3453                 BIC_NOT_PRESENT(BIC_CPU_LPI);
3454                 fclose(fp);
3455                 return -1;
3456         }
3457
3458         fclose(fp);
3459
3460         return 0;
3461 }
3462
3463 /*
3464  * snapshot_sys_lpi()
3465  *
3466  * record snapshot of sys_lpi_file
3467  */
3468 int snapshot_sys_lpi_us(void)
3469 {
3470         FILE *fp;
3471         int retval;
3472
3473         fp = fopen_or_die(sys_lpi_file, "r");
3474
3475         retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
3476         if (retval != 1) {
3477                 fprintf(stderr, "Disabling Low Power Idle System output\n");
3478                 BIC_NOT_PRESENT(BIC_SYS_LPI);
3479                 fclose(fp);
3480                 return -1;
3481         }
3482         fclose(fp);
3483
3484         return 0;
3485 }
3486
3487 /*
3488  * snapshot /proc and /sys files
3489  *
3490  * return 1 if configuration restart needed, else return 0
3491  */
3492 int snapshot_proc_sysfs_files(void)
3493 {
3494         if (DO_BIC(BIC_IRQ))
3495                 if (snapshot_proc_interrupts())
3496                         return 1;
3497
3498         if (DO_BIC(BIC_GFX_rc6))
3499                 snapshot_gfx_rc6_ms();
3500
3501         if (DO_BIC(BIC_GFXMHz))
3502                 snapshot_gfx_mhz();
3503
3504         if (DO_BIC(BIC_GFXACTMHz))
3505                 snapshot_gfx_act_mhz();
3506
3507         if (DO_BIC(BIC_CPU_LPI))
3508                 snapshot_cpu_lpi_us();
3509
3510         if (DO_BIC(BIC_SYS_LPI))
3511                 snapshot_sys_lpi_us();
3512
3513         return 0;
3514 }
3515
3516 int exit_requested;
3517
3518 static void signal_handler(int signal)
3519 {
3520         switch (signal) {
3521         case SIGINT:
3522                 exit_requested = 1;
3523                 if (debug)
3524                         fprintf(stderr, " SIGINT\n");
3525                 break;
3526         case SIGUSR1:
3527                 if (debug > 1)
3528                         fprintf(stderr, "SIGUSR1\n");
3529                 break;
3530         }
3531 }
3532
3533 void setup_signal_handler(void)
3534 {
3535         struct sigaction sa;
3536
3537         memset(&sa, 0, sizeof(sa));
3538
3539         sa.sa_handler = &signal_handler;
3540
3541         if (sigaction(SIGINT, &sa, NULL) < 0)
3542                 err(1, "sigaction SIGINT");
3543         if (sigaction(SIGUSR1, &sa, NULL) < 0)
3544                 err(1, "sigaction SIGUSR1");
3545 }
3546
3547 void do_sleep(void)
3548 {
3549         struct timeval tout;
3550         struct timespec rest;
3551         fd_set readfds;
3552         int retval;
3553
3554         FD_ZERO(&readfds);
3555         FD_SET(0, &readfds);
3556
3557         if (ignore_stdin) {
3558                 nanosleep(&interval_ts, NULL);
3559                 return;
3560         }
3561
3562         tout = interval_tv;
3563         retval = select(1, &readfds, NULL, NULL, &tout);
3564
3565         if (retval == 1) {
3566                 switch (getc(stdin)) {
3567                 case 'q':
3568                         exit_requested = 1;
3569                         break;
3570                 case EOF:
3571                         /*
3572                          * 'stdin' is a pipe closed on the other end. There
3573                          * won't be any further input.
3574                          */
3575                         ignore_stdin = 1;
3576                         /* Sleep the rest of the time */
3577                         rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
3578                         rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
3579                         nanosleep(&rest, NULL);
3580                 }
3581         }
3582 }
3583
3584 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
3585 {
3586         int ret, idx;
3587         unsigned long long msr_cur, msr_last;
3588
3589         if (!per_cpu_msr_sum)
3590                 return 1;
3591
3592         idx = offset_to_idx(offset);
3593         if (idx < 0)
3594                 return idx;
3595         /* get_msr_sum() = sum + (get_msr() - last) */
3596         ret = get_msr(cpu, offset, &msr_cur);
3597         if (ret)
3598                 return ret;
3599         msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
3600         DELTA_WRAP32(msr_cur, msr_last);
3601         *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
3602
3603         return 0;
3604 }
3605
3606 timer_t timerid;
3607
3608 /* Timer callback, update the sum of MSRs periodically. */
3609 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3610 {
3611         int i, ret;
3612         int cpu = t->cpu_id;
3613
3614         UNUSED(c);
3615         UNUSED(p);
3616
3617         for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
3618                 unsigned long long msr_cur, msr_last;
3619                 off_t offset;
3620
3621                 if (!idx_valid(i))
3622                         continue;
3623                 offset = idx_to_offset(i);
3624                 if (offset < 0)
3625                         continue;
3626                 ret = get_msr(cpu, offset, &msr_cur);
3627                 if (ret) {
3628                         fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
3629                         continue;
3630                 }
3631
3632                 msr_last = per_cpu_msr_sum[cpu].entries[i].last;
3633                 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
3634
3635                 DELTA_WRAP32(msr_cur, msr_last);
3636                 per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
3637         }
3638         return 0;
3639 }
3640
3641 static void msr_record_handler(union sigval v)
3642 {
3643         UNUSED(v);
3644
3645         for_all_cpus(update_msr_sum, EVEN_COUNTERS);
3646 }
3647
3648 void msr_sum_record(void)
3649 {
3650         struct itimerspec its;
3651         struct sigevent sev;
3652
3653         per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
3654         if (!per_cpu_msr_sum) {
3655                 fprintf(outf, "Can not allocate memory for long time MSR.\n");
3656                 return;
3657         }
3658         /*
3659          * Signal handler might be restricted, so use thread notifier instead.
3660          */
3661         memset(&sev, 0, sizeof(struct sigevent));
3662         sev.sigev_notify = SIGEV_THREAD;
3663         sev.sigev_notify_function = msr_record_handler;
3664
3665         sev.sigev_value.sival_ptr = &timerid;
3666         if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
3667                 fprintf(outf, "Can not create timer.\n");
3668                 goto release_msr;
3669         }
3670
3671         its.it_value.tv_sec = 0;
3672         its.it_value.tv_nsec = 1;
3673         /*
3674          * A wraparound time has been calculated early.
3675          * Some sources state that the peak power for a
3676          * microprocessor is usually 1.5 times the TDP rating,
3677          * use 2 * TDP for safety.
3678          */
3679         its.it_interval.tv_sec = rapl_joule_counter_range / 2;
3680         its.it_interval.tv_nsec = 0;
3681
3682         if (timer_settime(timerid, 0, &its, NULL) == -1) {
3683                 fprintf(outf, "Can not set timer.\n");
3684                 goto release_timer;
3685         }
3686         return;
3687
3688 release_timer:
3689         timer_delete(timerid);
3690 release_msr:
3691         free(per_cpu_msr_sum);
3692 }
3693
3694 /*
3695  * set_my_sched_priority(pri)
3696  * return previous
3697  */
3698 int set_my_sched_priority(int priority)
3699 {
3700         int retval;
3701         int original_priority;
3702
3703         errno = 0;
3704         original_priority = getpriority(PRIO_PROCESS, 0);
3705         if (errno && (original_priority == -1))
3706                 err(errno, "getpriority");
3707
3708         retval = setpriority(PRIO_PROCESS, 0, priority);
3709         if (retval)
3710                 errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
3711
3712         errno = 0;
3713         retval = getpriority(PRIO_PROCESS, 0);
3714         if (retval != priority)
3715                 err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
3716
3717         return original_priority;
3718 }
3719
3720 void turbostat_loop()
3721 {
3722         int retval;
3723         int restarted = 0;
3724         unsigned int done_iters = 0;
3725
3726         setup_signal_handler();
3727
3728         /*
3729          * elevate own priority for interval mode
3730          */
3731         set_my_sched_priority(-20);
3732
3733 restart:
3734         restarted++;
3735
3736         snapshot_proc_sysfs_files();
3737         retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3738         first_counter_read = 0;
3739         if (retval < -1) {
3740                 exit(retval);
3741         } else if (retval == -1) {
3742                 if (restarted > 10) {
3743                         exit(retval);
3744                 }
3745                 re_initialize();
3746                 goto restart;
3747         }
3748         restarted = 0;
3749         done_iters = 0;
3750         gettimeofday(&tv_even, (struct timezone *)NULL);
3751
3752         while (1) {
3753                 if (for_all_proc_cpus(cpu_is_not_present)) {
3754                         re_initialize();
3755                         goto restart;
3756                 }
3757                 do_sleep();
3758                 if (snapshot_proc_sysfs_files())
3759                         goto restart;
3760                 retval = for_all_cpus(get_counters, ODD_COUNTERS);
3761                 if (retval < -1) {
3762                         exit(retval);
3763                 } else if (retval == -1) {
3764                         re_initialize();
3765                         goto restart;
3766                 }
3767                 gettimeofday(&tv_odd, (struct timezone *)NULL);
3768                 timersub(&tv_odd, &tv_even, &tv_delta);
3769                 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
3770                         re_initialize();
3771                         goto restart;
3772                 }
3773                 compute_average(EVEN_COUNTERS);
3774                 format_all_counters(EVEN_COUNTERS);
3775                 flush_output_stdout();
3776                 if (exit_requested)
3777                         break;
3778                 if (num_iterations && ++done_iters >= num_iterations)
3779                         break;
3780                 do_sleep();
3781                 if (snapshot_proc_sysfs_files())
3782                         goto restart;
3783                 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
3784                 if (retval < -1) {
3785                         exit(retval);
3786                 } else if (retval == -1) {
3787                         re_initialize();
3788                         goto restart;
3789                 }
3790                 gettimeofday(&tv_even, (struct timezone *)NULL);
3791                 timersub(&tv_even, &tv_odd, &tv_delta);
3792                 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
3793                         re_initialize();
3794                         goto restart;
3795                 }
3796                 compute_average(ODD_COUNTERS);
3797                 format_all_counters(ODD_COUNTERS);
3798                 flush_output_stdout();
3799                 if (exit_requested)
3800                         break;
3801                 if (num_iterations && ++done_iters >= num_iterations)
3802                         break;
3803         }
3804 }
3805
3806 void check_dev_msr()
3807 {
3808         struct stat sb;
3809         char pathname[32];
3810
3811         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3812         if (stat(pathname, &sb))
3813                 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
3814                         err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
3815 }
3816
3817 /*
3818  * check for CAP_SYS_RAWIO
3819  * return 0 on success
3820  * return 1 on fail
3821  */
3822 int check_for_cap_sys_rawio(void)
3823 {
3824         cap_t caps;
3825         cap_flag_value_t cap_flag_value;
3826
3827         caps = cap_get_proc();
3828         if (caps == NULL)
3829                 err(-6, "cap_get_proc\n");
3830
3831         if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
3832                 err(-6, "cap_get\n");
3833
3834         if (cap_flag_value != CAP_SET) {
3835                 warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
3836                 return 1;
3837         }
3838
3839         if (cap_free(caps) == -1)
3840                 err(-6, "cap_free\n");
3841
3842         return 0;
3843 }
3844
3845 void check_permissions(void)
3846 {
3847         int do_exit = 0;
3848         char pathname[32];
3849
3850         /* check for CAP_SYS_RAWIO */
3851         do_exit += check_for_cap_sys_rawio();
3852
3853         /* test file permissions */
3854         sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
3855         if (euidaccess(pathname, R_OK)) {
3856                 do_exit++;
3857                 warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
3858         }
3859
3860         /* if all else fails, thell them to be root */
3861         if (do_exit)
3862                 if (getuid() != 0)
3863                         warnx("... or simply run as root");
3864
3865         if (do_exit)
3866                 exit(-6);
3867 }
3868
3869 /*
3870  * NHM adds support for additional MSRs:
3871  *
3872  * MSR_SMI_COUNT                   0x00000034
3873  *
3874  * MSR_PLATFORM_INFO               0x000000ce
3875  * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
3876  *
3877  * MSR_MISC_PWR_MGMT               0x000001aa
3878  *
3879  * MSR_PKG_C3_RESIDENCY            0x000003f8
3880  * MSR_PKG_C6_RESIDENCY            0x000003f9
3881  * MSR_CORE_C3_RESIDENCY           0x000003fc
3882  * MSR_CORE_C6_RESIDENCY           0x000003fd
3883  *
3884  * Side effect:
3885  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
3886  * sets has_misc_feature_control
3887  */
3888 int probe_nhm_msrs(unsigned int family, unsigned int model)
3889 {
3890         unsigned long long msr;
3891         unsigned int base_ratio;
3892         int *pkg_cstate_limits;
3893
3894         if (!genuine_intel)
3895                 return 0;
3896
3897         if (family != 6)
3898                 return 0;
3899
3900         bclk = discover_bclk(family, model);
3901
3902         switch (model) {
3903         case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
3904         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
3905                 pkg_cstate_limits = nhm_pkg_cstate_limits;
3906                 break;
3907         case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
3908         case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
3909         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
3910         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
3911                 pkg_cstate_limits = snb_pkg_cstate_limits;
3912                 has_misc_feature_control = 1;
3913                 break;
3914         case INTEL_FAM6_HASWELL:        /* HSW */
3915         case INTEL_FAM6_HASWELL_G:      /* HSW */
3916         case INTEL_FAM6_HASWELL_X:      /* HSX */
3917         case INTEL_FAM6_HASWELL_L:      /* HSW */
3918         case INTEL_FAM6_BROADWELL:      /* BDW */
3919         case INTEL_FAM6_BROADWELL_G:    /* BDW */
3920         case INTEL_FAM6_BROADWELL_X:    /* BDX */
3921         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
3922         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
3923                 pkg_cstate_limits = hsw_pkg_cstate_limits;
3924                 has_misc_feature_control = 1;
3925                 break;
3926         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
3927         case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
3928                 pkg_cstate_limits = skx_pkg_cstate_limits;
3929                 has_misc_feature_control = 1;
3930                 break;
3931         case INTEL_FAM6_ICELAKE_X:      /* ICX */
3932                 pkg_cstate_limits = icx_pkg_cstate_limits;
3933                 has_misc_feature_control = 1;
3934                 break;
3935         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
3936                 no_MSR_MISC_PWR_MGMT = 1;
3937                 /* FALLTHRU */
3938         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
3939                 pkg_cstate_limits = slv_pkg_cstate_limits;
3940                 break;
3941         case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
3942                 pkg_cstate_limits = amt_pkg_cstate_limits;
3943                 no_MSR_MISC_PWR_MGMT = 1;
3944                 break;
3945         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
3946                 pkg_cstate_limits = phi_pkg_cstate_limits;
3947                 break;
3948         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
3949         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3950         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
3951         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
3952         case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
3953                 pkg_cstate_limits = glm_pkg_cstate_limits;
3954                 break;
3955         default:
3956                 return 0;
3957         }
3958         get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
3959         pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
3960
3961         get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
3962         base_ratio = (msr >> 8) & 0xFF;
3963
3964         base_hz = base_ratio * bclk * 1000000;
3965         has_base_hz = 1;
3966         return 1;
3967 }
3968
3969 /*
3970  * SLV client has support for unique MSRs:
3971  *
3972  * MSR_CC6_DEMOTION_POLICY_CONFIG
3973  * MSR_MC6_DEMOTION_POLICY_CONFIG
3974  */
3975
3976 int has_slv_msrs(unsigned int family, unsigned int model)
3977 {
3978         if (!genuine_intel)
3979                 return 0;
3980
3981         if (family != 6)
3982                 return 0;
3983
3984         switch (model) {
3985         case INTEL_FAM6_ATOM_SILVERMONT:
3986                 return 1;
3987         }
3988         return 0;
3989 }
3990
3991 int is_dnv(unsigned int family, unsigned int model)
3992 {
3993
3994         if (!genuine_intel)
3995                 return 0;
3996
3997         if (family != 6)
3998                 return 0;
3999
4000         switch (model) {
4001         case INTEL_FAM6_ATOM_GOLDMONT_D:
4002                 return 1;
4003         }
4004         return 0;
4005 }
4006
4007 int is_bdx(unsigned int family, unsigned int model)
4008 {
4009
4010         if (!genuine_intel)
4011                 return 0;
4012
4013         if (family != 6)
4014                 return 0;
4015
4016         switch (model) {
4017         case INTEL_FAM6_BROADWELL_X:
4018                 return 1;
4019         }
4020         return 0;
4021 }
4022
4023 int is_skx(unsigned int family, unsigned int model)
4024 {
4025
4026         if (!genuine_intel)
4027                 return 0;
4028
4029         if (family != 6)
4030                 return 0;
4031
4032         switch (model) {
4033         case INTEL_FAM6_SKYLAKE_X:
4034                 return 1;
4035         }
4036         return 0;
4037 }
4038
4039 int is_icx(unsigned int family, unsigned int model)
4040 {
4041
4042         if (!genuine_intel)
4043                 return 0;
4044
4045         if (family != 6)
4046                 return 0;
4047
4048         switch (model) {
4049         case INTEL_FAM6_ICELAKE_X:
4050                 return 1;
4051         }
4052         return 0;
4053 }
4054
4055 int is_spr(unsigned int family, unsigned int model)
4056 {
4057
4058         if (!genuine_intel)
4059                 return 0;
4060
4061         if (family != 6)
4062                 return 0;
4063
4064         switch (model) {
4065         case INTEL_FAM6_SAPPHIRERAPIDS_X:
4066                 return 1;
4067         }
4068         return 0;
4069 }
4070
4071 int is_ehl(unsigned int family, unsigned int model)
4072 {
4073         if (!genuine_intel)
4074                 return 0;
4075
4076         if (family != 6)
4077                 return 0;
4078
4079         switch (model) {
4080         case INTEL_FAM6_ATOM_TREMONT:
4081                 return 1;
4082         }
4083         return 0;
4084 }
4085
4086 int is_jvl(unsigned int family, unsigned int model)
4087 {
4088         if (!genuine_intel)
4089                 return 0;
4090
4091         if (family != 6)
4092                 return 0;
4093
4094         switch (model) {
4095         case INTEL_FAM6_ATOM_TREMONT_D:
4096                 return 1;
4097         }
4098         return 0;
4099 }
4100
4101 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
4102 {
4103         if (has_slv_msrs(family, model))
4104                 return 0;
4105
4106         if (family != 6)
4107                 return 0;
4108
4109         switch (model) {
4110                 /* Nehalem compatible, but do not include turbo-ratio limit support */
4111         case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
4112         case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
4113                 return 0;
4114         default:
4115                 return 1;
4116         }
4117 }
4118
4119 int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
4120 {
4121         if (has_slv_msrs(family, model))
4122                 return 1;
4123
4124         return 0;
4125 }
4126
4127 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
4128 {
4129         if (!genuine_intel)
4130                 return 0;
4131
4132         if (family != 6)
4133                 return 0;
4134
4135         switch (model) {
4136         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
4137         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
4138                 return 1;
4139         default:
4140                 return 0;
4141         }
4142 }
4143
4144 int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
4145 {
4146         if (!genuine_intel)
4147                 return 0;
4148
4149         if (family != 6)
4150                 return 0;
4151
4152         switch (model) {
4153         case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
4154                 return 1;
4155         default:
4156                 return 0;
4157         }
4158 }
4159
4160 int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
4161 {
4162         if (!genuine_intel)
4163                 return 0;
4164
4165         if (family != 6)
4166                 return 0;
4167
4168         switch (model) {
4169         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
4170                 return 1;
4171         default:
4172                 return 0;
4173         }
4174 }
4175
4176 int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
4177 {
4178         if (!genuine_intel)
4179                 return 0;
4180
4181         if (family != 6)
4182                 return 0;
4183
4184         switch (model) {
4185         case INTEL_FAM6_ATOM_GOLDMONT:
4186         case INTEL_FAM6_SKYLAKE_X:
4187         case INTEL_FAM6_ICELAKE_X:
4188         case INTEL_FAM6_SAPPHIRERAPIDS_X:
4189                 return 1;
4190         default:
4191                 return 0;
4192         }
4193 }
4194
4195 int has_config_tdp(unsigned int family, unsigned int model)
4196 {
4197         if (!genuine_intel)
4198                 return 0;
4199
4200         if (family != 6)
4201                 return 0;
4202
4203         switch (model) {
4204         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
4205         case INTEL_FAM6_HASWELL:        /* HSW */
4206         case INTEL_FAM6_HASWELL_X:      /* HSX */
4207         case INTEL_FAM6_HASWELL_L:      /* HSW */
4208         case INTEL_FAM6_HASWELL_G:      /* HSW */
4209         case INTEL_FAM6_BROADWELL:      /* BDW */
4210         case INTEL_FAM6_BROADWELL_G:    /* BDW */
4211         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4212         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4213         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4214         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4215         case INTEL_FAM6_ICELAKE_X:      /* ICX */
4216         case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
4217         case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
4218                 return 1;
4219         default:
4220                 return 0;
4221         }
4222 }
4223
4224 /*
4225  * tcc_offset_bits:
4226  * 0: Tcc Offset not supported (Default)
4227  * 6: Bit 29:24 of MSR_PLATFORM_INFO
4228  * 4: Bit 27:24 of MSR_PLATFORM_INFO
4229  */
4230 void check_tcc_offset(int model)
4231 {
4232         unsigned long long msr;
4233
4234         if (!genuine_intel)
4235                 return;
4236
4237         switch (model) {
4238         case INTEL_FAM6_SKYLAKE_L:
4239         case INTEL_FAM6_CANNONLAKE_L:
4240                 if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
4241                         msr = (msr >> 30) & 1;
4242                         if (msr)
4243                                 tcc_offset_bits = 6;
4244                 }
4245                 return;
4246         default:
4247                 return;
4248         }
4249 }
4250
4251 static void remove_underbar(char *s)
4252 {
4253         char *to = s;
4254
4255         while (*s) {
4256                 if (*s != '_')
4257                         *to++ = *s;
4258                 s++;
4259         }
4260
4261         *to = 0;
4262 }
4263
4264 static void dump_turbo_ratio_info(unsigned int family, unsigned int model)
4265 {
4266         if (!has_turbo)
4267                 return;
4268
4269         if (has_hsw_turbo_ratio_limit(family, model))
4270                 dump_hsw_turbo_ratio_limits();
4271
4272         if (has_ivt_turbo_ratio_limit(family, model))
4273                 dump_ivt_turbo_ratio_limits();
4274
4275         if (has_turbo_ratio_limit(family, model)) {
4276                 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
4277
4278                 if (is_hybrid)
4279                         dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
4280         }
4281
4282         if (has_atom_turbo_ratio_limit(family, model))
4283                 dump_atom_turbo_ratio_limits();
4284
4285         if (has_knl_turbo_ratio_limit(family, model))
4286                 dump_knl_turbo_ratio_limits();
4287
4288         if (has_config_tdp(family, model))
4289                 dump_config_tdp();
4290 }
4291
4292 static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
4293 {
4294         if (!do_nhm_platform_info)
4295                 return;
4296
4297         dump_nhm_platform_info();
4298         dump_turbo_ratio_info(family, model);
4299         dump_nhm_cst_cfg();
4300 }
4301
4302 static int read_sysfs_int(char *path)
4303 {
4304         FILE *input;
4305         int retval = -1;
4306
4307         input = fopen(path, "r");
4308         if (input == NULL) {
4309                 if (debug)
4310                         fprintf(outf, "NSFOD %s\n", path);
4311                 return (-1);
4312         }
4313         if (fscanf(input, "%d", &retval) != 1)
4314                 err(1, "%s: failed to read int from file", path);
4315         fclose(input);
4316
4317         return (retval);
4318 }
4319
4320 static void dump_sysfs_file(char *path)
4321 {
4322         FILE *input;
4323         char cpuidle_buf[64];
4324
4325         input = fopen(path, "r");
4326         if (input == NULL) {
4327                 if (debug)
4328                         fprintf(outf, "NSFOD %s\n", path);
4329                 return;
4330         }
4331         if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
4332                 err(1, "%s: failed to read file", path);
4333         fclose(input);
4334
4335         fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
4336 }
4337
4338 static void intel_uncore_frequency_probe(void)
4339 {
4340         int i, j;
4341         char path[128];
4342
4343         if (!genuine_intel)
4344                 return;
4345
4346         if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
4347                 return;
4348
4349         /* Cluster level sysfs not supported yet. */
4350         if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
4351                 return;
4352
4353         if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
4354                 BIC_PRESENT(BIC_UNCORE_MHZ);
4355
4356         if (quiet)
4357                 return;
4358
4359         for (i = 0; i < topo.num_packages; ++i) {
4360                 for (j = 0; j < topo.num_die; ++j) {
4361                         int k, l;
4362
4363                         sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
4364                                 i, j);
4365                         k = read_sysfs_int(path);
4366                         sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
4367                                 i, j);
4368                         l = read_sysfs_int(path);
4369                         fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
4370
4371                         sprintf(path,
4372                                 "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
4373                                 i, j);
4374                         k = read_sysfs_int(path);
4375                         sprintf(path,
4376                                 "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
4377                                 i, j);
4378                         l = read_sysfs_int(path);
4379                         fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
4380                 }
4381         }
4382 }
4383
4384 static void dump_sysfs_cstate_config(void)
4385 {
4386         char path[64];
4387         char name_buf[16];
4388         char desc[64];
4389         FILE *input;
4390         int state;
4391         char *sp;
4392
4393         if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
4394                 fprintf(outf, "cpuidle not loaded\n");
4395                 return;
4396         }
4397
4398         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
4399         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
4400         dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
4401
4402         for (state = 0; state < 10; ++state) {
4403
4404                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
4405                 input = fopen(path, "r");
4406                 if (input == NULL)
4407                         continue;
4408                 if (!fgets(name_buf, sizeof(name_buf), input))
4409                         err(1, "%s: failed to read file", path);
4410
4411                 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
4412                 sp = strchr(name_buf, '-');
4413                 if (!sp)
4414                         sp = strchrnul(name_buf, '\n');
4415                 *sp = '\0';
4416                 fclose(input);
4417
4418                 remove_underbar(name_buf);
4419
4420                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
4421                 input = fopen(path, "r");
4422                 if (input == NULL)
4423                         continue;
4424                 if (!fgets(desc, sizeof(desc), input))
4425                         err(1, "%s: failed to read file", path);
4426
4427                 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
4428                 fclose(input);
4429         }
4430 }
4431
4432 static void dump_sysfs_pstate_config(void)
4433 {
4434         char path[64];
4435         char driver_buf[64];
4436         char governor_buf[64];
4437         FILE *input;
4438         int turbo;
4439
4440         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
4441         input = fopen(path, "r");
4442         if (input == NULL) {
4443                 fprintf(outf, "NSFOD %s\n", path);
4444                 return;
4445         }
4446         if (!fgets(driver_buf, sizeof(driver_buf), input))
4447                 err(1, "%s: failed to read file", path);
4448         fclose(input);
4449
4450         sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
4451         input = fopen(path, "r");
4452         if (input == NULL) {
4453                 fprintf(outf, "NSFOD %s\n", path);
4454                 return;
4455         }
4456         if (!fgets(governor_buf, sizeof(governor_buf), input))
4457                 err(1, "%s: failed to read file", path);
4458         fclose(input);
4459
4460         fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
4461         fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
4462
4463         sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
4464         input = fopen(path, "r");
4465         if (input != NULL) {
4466                 if (fscanf(input, "%d", &turbo) != 1)
4467                         err(1, "%s: failed to parse number from file", path);
4468                 fprintf(outf, "cpufreq boost: %d\n", turbo);
4469                 fclose(input);
4470         }
4471
4472         sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
4473         input = fopen(path, "r");
4474         if (input != NULL) {
4475                 if (fscanf(input, "%d", &turbo) != 1)
4476                         err(1, "%s: failed to parse number from file", path);
4477                 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
4478                 fclose(input);
4479         }
4480 }
4481
4482 /*
4483  * print_epb()
4484  * Decode the ENERGY_PERF_BIAS MSR
4485  */
4486 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4487 {
4488         char *epb_string;
4489         int cpu, epb;
4490
4491         UNUSED(c);
4492         UNUSED(p);
4493
4494         if (!has_epb)
4495                 return 0;
4496
4497         cpu = t->cpu_id;
4498
4499         /* EPB is per-package */
4500         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4501                 return 0;
4502
4503         if (cpu_migrate(cpu)) {
4504                 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
4505                 return -1;
4506         }
4507
4508         epb = get_epb(cpu);
4509         if (epb < 0)
4510                 return 0;
4511
4512         switch (epb) {
4513         case ENERGY_PERF_BIAS_PERFORMANCE:
4514                 epb_string = "performance";
4515                 break;
4516         case ENERGY_PERF_BIAS_NORMAL:
4517                 epb_string = "balanced";
4518                 break;
4519         case ENERGY_PERF_BIAS_POWERSAVE:
4520                 epb_string = "powersave";
4521                 break;
4522         default:
4523                 epb_string = "custom";
4524                 break;
4525         }
4526         fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
4527
4528         return 0;
4529 }
4530
4531 /*
4532  * print_hwp()
4533  * Decode the MSR_HWP_CAPABILITIES
4534  */
4535 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4536 {
4537         unsigned long long msr;
4538         int cpu;
4539
4540         UNUSED(c);
4541         UNUSED(p);
4542
4543         if (!has_hwp)
4544                 return 0;
4545
4546         cpu = t->cpu_id;
4547
4548         /* MSR_HWP_CAPABILITIES is per-package */
4549         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4550                 return 0;
4551
4552         if (cpu_migrate(cpu)) {
4553                 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
4554                 return -1;
4555         }
4556
4557         if (get_msr(cpu, MSR_PM_ENABLE, &msr))
4558                 return 0;
4559
4560         fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
4561
4562         /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
4563         if ((msr & (1 << 0)) == 0)
4564                 return 0;
4565
4566         if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
4567                 return 0;
4568
4569         fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
4570                 "(high %d guar %d eff %d low %d)\n",
4571                 cpu, msr,
4572                 (unsigned int)HWP_HIGHEST_PERF(msr),
4573                 (unsigned int)HWP_GUARANTEED_PERF(msr),
4574                 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
4575
4576         if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
4577                 return 0;
4578
4579         fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
4580                 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
4581                 cpu, msr,
4582                 (unsigned int)(((msr) >> 0) & 0xff),
4583                 (unsigned int)(((msr) >> 8) & 0xff),
4584                 (unsigned int)(((msr) >> 16) & 0xff),
4585                 (unsigned int)(((msr) >> 24) & 0xff),
4586                 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
4587
4588         if (has_hwp_pkg) {
4589                 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
4590                         return 0;
4591
4592                 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
4593                         "(min %d max %d des %d epp 0x%x window 0x%x)\n",
4594                         cpu, msr,
4595                         (unsigned int)(((msr) >> 0) & 0xff),
4596                         (unsigned int)(((msr) >> 8) & 0xff),
4597                         (unsigned int)(((msr) >> 16) & 0xff),
4598                         (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
4599         }
4600         if (has_hwp_notify) {
4601                 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
4602                         return 0;
4603
4604                 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
4605                         "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
4606                         cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
4607         }
4608         if (get_msr(cpu, MSR_HWP_STATUS, &msr))
4609                 return 0;
4610
4611         fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
4612                 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
4613                 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
4614
4615         return 0;
4616 }
4617
4618 /*
4619  * print_perf_limit()
4620  */
4621 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4622 {
4623         unsigned long long msr;
4624         int cpu;
4625
4626         UNUSED(c);
4627         UNUSED(p);
4628
4629         cpu = t->cpu_id;
4630
4631         /* per-package */
4632         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
4633                 return 0;
4634
4635         if (cpu_migrate(cpu)) {
4636                 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
4637                 return -1;
4638         }
4639
4640         if (do_core_perf_limit_reasons) {
4641                 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
4642                 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4643                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
4644                         (msr & 1 << 15) ? "bit15, " : "",
4645                         (msr & 1 << 14) ? "bit14, " : "",
4646                         (msr & 1 << 13) ? "Transitions, " : "",
4647                         (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
4648                         (msr & 1 << 11) ? "PkgPwrL2, " : "",
4649                         (msr & 1 << 10) ? "PkgPwrL1, " : "",
4650                         (msr & 1 << 9) ? "CorePwr, " : "",
4651                         (msr & 1 << 8) ? "Amps, " : "",
4652                         (msr & 1 << 6) ? "VR-Therm, " : "",
4653                         (msr & 1 << 5) ? "Auto-HWP, " : "",
4654                         (msr & 1 << 4) ? "Graphics, " : "",
4655                         (msr & 1 << 2) ? "bit2, " : "",
4656                         (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
4657                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
4658                         (msr & 1 << 31) ? "bit31, " : "",
4659                         (msr & 1 << 30) ? "bit30, " : "",
4660                         (msr & 1 << 29) ? "Transitions, " : "",
4661                         (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
4662                         (msr & 1 << 27) ? "PkgPwrL2, " : "",
4663                         (msr & 1 << 26) ? "PkgPwrL1, " : "",
4664                         (msr & 1 << 25) ? "CorePwr, " : "",
4665                         (msr & 1 << 24) ? "Amps, " : "",
4666                         (msr & 1 << 22) ? "VR-Therm, " : "",
4667                         (msr & 1 << 21) ? "Auto-HWP, " : "",
4668                         (msr & 1 << 20) ? "Graphics, " : "",
4669                         (msr & 1 << 18) ? "bit18, " : "",
4670                         (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
4671
4672         }
4673         if (do_gfx_perf_limit_reasons) {
4674                 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
4675                 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4676                 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
4677                         (msr & 1 << 0) ? "PROCHOT, " : "",
4678                         (msr & 1 << 1) ? "ThermStatus, " : "",
4679                         (msr & 1 << 4) ? "Graphics, " : "",
4680                         (msr & 1 << 6) ? "VR-Therm, " : "",
4681                         (msr & 1 << 8) ? "Amps, " : "",
4682                         (msr & 1 << 9) ? "GFXPwr, " : "",
4683                         (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4684                 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
4685                         (msr & 1 << 16) ? "PROCHOT, " : "",
4686                         (msr & 1 << 17) ? "ThermStatus, " : "",
4687                         (msr & 1 << 20) ? "Graphics, " : "",
4688                         (msr & 1 << 22) ? "VR-Therm, " : "",
4689                         (msr & 1 << 24) ? "Amps, " : "",
4690                         (msr & 1 << 25) ? "GFXPwr, " : "",
4691                         (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4692         }
4693         if (do_ring_perf_limit_reasons) {
4694                 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
4695                 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
4696                 fprintf(outf, " (Active: %s%s%s%s%s%s)",
4697                         (msr & 1 << 0) ? "PROCHOT, " : "",
4698                         (msr & 1 << 1) ? "ThermStatus, " : "",
4699                         (msr & 1 << 6) ? "VR-Therm, " : "",
4700                         (msr & 1 << 8) ? "Amps, " : "",
4701                         (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
4702                 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
4703                         (msr & 1 << 16) ? "PROCHOT, " : "",
4704                         (msr & 1 << 17) ? "ThermStatus, " : "",
4705                         (msr & 1 << 22) ? "VR-Therm, " : "",
4706                         (msr & 1 << 24) ? "Amps, " : "",
4707                         (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
4708         }
4709         return 0;
4710 }
4711
4712 #define RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
4713 #define RAPL_TIME_GRANULARITY   0x3F    /* 6 bit time granularity */
4714
4715 double get_tdp_intel(unsigned int model)
4716 {
4717         unsigned long long msr;
4718
4719         if (do_rapl & RAPL_PKG_POWER_INFO)
4720                 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
4721                         return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
4722
4723         switch (model) {
4724         case INTEL_FAM6_ATOM_SILVERMONT:
4725         case INTEL_FAM6_ATOM_SILVERMONT_D:
4726                 return 30.0;
4727         default:
4728                 return 135.0;
4729         }
4730 }
4731
4732 double get_tdp_amd(unsigned int family)
4733 {
4734         UNUSED(family);
4735
4736         /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
4737         return 280.0;
4738 }
4739
4740 /*
4741  * rapl_dram_energy_units_probe()
4742  * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
4743  */
4744 static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
4745 {
4746         /* only called for genuine_intel, family 6 */
4747
4748         switch (model) {
4749         case INTEL_FAM6_HASWELL_X:      /* HSX */
4750         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4751         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4752         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4753         case INTEL_FAM6_ICELAKE_X:      /* ICX */
4754                 return (rapl_dram_energy_units = 15.3 / 1000000);
4755         default:
4756                 return (rapl_energy_units);
4757         }
4758 }
4759
4760 void rapl_probe_intel(unsigned int family, unsigned int model)
4761 {
4762         unsigned long long msr;
4763         unsigned int time_unit;
4764         double tdp;
4765
4766         if (family != 6)
4767                 return;
4768
4769         switch (model) {
4770         case INTEL_FAM6_SANDYBRIDGE:
4771         case INTEL_FAM6_IVYBRIDGE:
4772         case INTEL_FAM6_HASWELL:        /* HSW */
4773         case INTEL_FAM6_HASWELL_L:      /* HSW */
4774         case INTEL_FAM6_HASWELL_G:      /* HSW */
4775         case INTEL_FAM6_BROADWELL:      /* BDW */
4776         case INTEL_FAM6_BROADWELL_G:    /* BDW */
4777                 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
4778                 if (rapl_joules) {
4779                         BIC_PRESENT(BIC_Pkg_J);
4780                         BIC_PRESENT(BIC_Cor_J);
4781                         BIC_PRESENT(BIC_GFX_J);
4782                 } else {
4783                         BIC_PRESENT(BIC_PkgWatt);
4784                         BIC_PRESENT(BIC_CorWatt);
4785                         BIC_PRESENT(BIC_GFXWatt);
4786                 }
4787                 break;
4788         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
4789         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4790                 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
4791                 if (rapl_joules)
4792                         BIC_PRESENT(BIC_Pkg_J);
4793                 else
4794                         BIC_PRESENT(BIC_PkgWatt);
4795                 break;
4796         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
4797                 do_rapl =
4798                     RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4799                     | RAPL_GFX | RAPL_PKG_POWER_INFO;
4800                 if (rapl_joules) {
4801                         BIC_PRESENT(BIC_Pkg_J);
4802                         BIC_PRESENT(BIC_Cor_J);
4803                         BIC_PRESENT(BIC_RAM_J);
4804                         BIC_PRESENT(BIC_GFX_J);
4805                 } else {
4806                         BIC_PRESENT(BIC_PkgWatt);
4807                         BIC_PRESENT(BIC_CorWatt);
4808                         BIC_PRESENT(BIC_RAMWatt);
4809                         BIC_PRESENT(BIC_GFXWatt);
4810                 }
4811                 break;
4812         case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
4813                 do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
4814                 BIC_PRESENT(BIC_PKG__);
4815                 if (rapl_joules)
4816                         BIC_PRESENT(BIC_Pkg_J);
4817                 else
4818                         BIC_PRESENT(BIC_PkgWatt);
4819                 break;
4820         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
4821         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
4822                 do_rapl =
4823                     RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
4824                     | RAPL_GFX | RAPL_PKG_POWER_INFO;
4825                 BIC_PRESENT(BIC_PKG__);
4826                 BIC_PRESENT(BIC_RAM__);
4827                 if (rapl_joules) {
4828                         BIC_PRESENT(BIC_Pkg_J);
4829                         BIC_PRESENT(BIC_Cor_J);
4830                         BIC_PRESENT(BIC_RAM_J);
4831                         BIC_PRESENT(BIC_GFX_J);
4832                 } else {
4833                         BIC_PRESENT(BIC_PkgWatt);
4834                         BIC_PRESENT(BIC_CorWatt);
4835                         BIC_PRESENT(BIC_RAMWatt);
4836                         BIC_PRESENT(BIC_GFXWatt);
4837                 }
4838                 break;
4839         case INTEL_FAM6_HASWELL_X:      /* HSX */
4840         case INTEL_FAM6_BROADWELL_X:    /* BDX */
4841         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
4842         case INTEL_FAM6_ICELAKE_X:      /* ICX */
4843         case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
4844         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
4845                 do_rapl =
4846                     RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4847                     RAPL_PKG_POWER_INFO;
4848                 BIC_PRESENT(BIC_PKG__);
4849                 BIC_PRESENT(BIC_RAM__);
4850                 if (rapl_joules) {
4851                         BIC_PRESENT(BIC_Pkg_J);
4852                         BIC_PRESENT(BIC_RAM_J);
4853                 } else {
4854                         BIC_PRESENT(BIC_PkgWatt);
4855                         BIC_PRESENT(BIC_RAMWatt);
4856                 }
4857                 break;
4858         case INTEL_FAM6_SANDYBRIDGE_X:
4859         case INTEL_FAM6_IVYBRIDGE_X:
4860                 do_rapl =
4861                     RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
4862                     RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
4863                 BIC_PRESENT(BIC_PKG__);
4864                 BIC_PRESENT(BIC_RAM__);
4865                 if (rapl_joules) {
4866                         BIC_PRESENT(BIC_Pkg_J);
4867                         BIC_PRESENT(BIC_Cor_J);
4868                         BIC_PRESENT(BIC_RAM_J);
4869                 } else {
4870                         BIC_PRESENT(BIC_PkgWatt);
4871                         BIC_PRESENT(BIC_CorWatt);
4872                         BIC_PRESENT(BIC_RAMWatt);
4873                 }
4874                 break;
4875         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
4876         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
4877                 do_rapl = RAPL_PKG | RAPL_CORES;
4878                 if (rapl_joules) {
4879                         BIC_PRESENT(BIC_Pkg_J);
4880                         BIC_PRESENT(BIC_Cor_J);
4881                 } else {
4882                         BIC_PRESENT(BIC_PkgWatt);
4883                         BIC_PRESENT(BIC_CorWatt);
4884                 }
4885                 break;
4886         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
4887                 do_rapl =
4888                     RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
4889                     RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
4890                 BIC_PRESENT(BIC_PKG__);
4891                 BIC_PRESENT(BIC_RAM__);
4892                 if (rapl_joules) {
4893                         BIC_PRESENT(BIC_Pkg_J);
4894                         BIC_PRESENT(BIC_Cor_J);
4895                         BIC_PRESENT(BIC_RAM_J);
4896                 } else {
4897                         BIC_PRESENT(BIC_PkgWatt);
4898                         BIC_PRESENT(BIC_CorWatt);
4899                         BIC_PRESENT(BIC_RAMWatt);
4900                 }
4901                 break;
4902         default:
4903                 return;
4904         }
4905
4906         /* units on package 0, verify later other packages match */
4907         if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
4908                 return;
4909
4910         rapl_power_units = 1.0 / (1 << (msr & 0xF));
4911         if (model == INTEL_FAM6_ATOM_SILVERMONT)
4912                 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
4913         else
4914                 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
4915
4916         rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
4917
4918         time_unit = msr >> 16 & 0xF;
4919         if (time_unit == 0)
4920                 time_unit = 0xA;
4921
4922         rapl_time_units = 1.0 / (1 << (time_unit));
4923
4924         tdp = get_tdp_intel(model);
4925
4926         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4927         if (!quiet)
4928                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4929 }
4930
4931 void rapl_probe_amd(unsigned int family, unsigned int model)
4932 {
4933         unsigned long long msr;
4934         unsigned int eax, ebx, ecx, edx;
4935         unsigned int has_rapl = 0;
4936         double tdp;
4937
4938         UNUSED(model);
4939
4940         if (max_extended_level >= 0x80000007) {
4941                 __cpuid(0x80000007, eax, ebx, ecx, edx);
4942                 /* RAPL (Fam 17h+) */
4943                 has_rapl = edx & (1 << 14);
4944         }
4945
4946         if (!has_rapl || family < 0x17)
4947                 return;
4948
4949         do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
4950         if (rapl_joules) {
4951                 BIC_PRESENT(BIC_Pkg_J);
4952                 BIC_PRESENT(BIC_Cor_J);
4953         } else {
4954                 BIC_PRESENT(BIC_PkgWatt);
4955                 BIC_PRESENT(BIC_CorWatt);
4956         }
4957
4958         if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
4959                 return;
4960
4961         rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
4962         rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
4963         rapl_power_units = ldexp(1.0, -(msr & 0xf));
4964
4965         tdp = get_tdp_amd(family);
4966
4967         rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
4968         if (!quiet)
4969                 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
4970 }
4971
4972 /*
4973  * rapl_probe()
4974  *
4975  * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
4976  */
4977 void rapl_probe(unsigned int family, unsigned int model)
4978 {
4979         if (genuine_intel)
4980                 rapl_probe_intel(family, model);
4981         if (authentic_amd || hygon_genuine)
4982                 rapl_probe_amd(family, model);
4983 }
4984
4985 void perf_limit_reasons_probe(unsigned int family, unsigned int model)
4986 {
4987         if (!genuine_intel)
4988                 return;
4989
4990         if (family != 6)
4991                 return;
4992
4993         switch (model) {
4994         case INTEL_FAM6_HASWELL:        /* HSW */
4995         case INTEL_FAM6_HASWELL_L:      /* HSW */
4996         case INTEL_FAM6_HASWELL_G:      /* HSW */
4997                 do_gfx_perf_limit_reasons = 1;
4998                 /* FALLTHRU */
4999         case INTEL_FAM6_HASWELL_X:      /* HSX */
5000                 do_core_perf_limit_reasons = 1;
5001                 do_ring_perf_limit_reasons = 1;
5002         default:
5003                 return;
5004         }
5005 }
5006
5007 void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
5008 {
5009         if (family != 6)
5010                 return;
5011
5012         switch (model) {
5013         case INTEL_FAM6_BROADWELL_X:
5014         case INTEL_FAM6_SKYLAKE_X:
5015                 has_automatic_cstate_conversion = 1;
5016         }
5017 }
5018
5019 void prewake_cstate_probe(unsigned int family, unsigned int model)
5020 {
5021         if (is_icx(family, model) || is_spr(family, model))
5022                 dis_cstate_prewake = 1;
5023 }
5024
5025 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5026 {
5027         unsigned long long msr;
5028         unsigned int dts, dts2;
5029         int cpu;
5030
5031         UNUSED(c);
5032         UNUSED(p);
5033
5034         if (!(do_dts || do_ptm))
5035                 return 0;
5036
5037         cpu = t->cpu_id;
5038
5039         /* DTS is per-core, no need to print for each thread */
5040         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
5041                 return 0;
5042
5043         if (cpu_migrate(cpu)) {
5044                 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
5045                 return -1;
5046         }
5047
5048         if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
5049                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
5050                         return 0;
5051
5052                 dts = (msr >> 16) & 0x7F;
5053                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
5054
5055                 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
5056                         return 0;
5057
5058                 dts = (msr >> 16) & 0x7F;
5059                 dts2 = (msr >> 8) & 0x7F;
5060                 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
5061                         cpu, msr, tj_max - dts, tj_max - dts2);
5062         }
5063
5064         if (do_dts && debug) {
5065                 unsigned int resolution;
5066
5067                 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
5068                         return 0;
5069
5070                 dts = (msr >> 16) & 0x7F;
5071                 resolution = (msr >> 27) & 0xF;
5072                 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
5073                         cpu, msr, tj_max - dts, resolution);
5074
5075                 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
5076                         return 0;
5077
5078                 dts = (msr >> 16) & 0x7F;
5079                 dts2 = (msr >> 8) & 0x7F;
5080                 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
5081                         cpu, msr, tj_max - dts, tj_max - dts2);
5082         }
5083
5084         return 0;
5085 }
5086
5087 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
5088 {
5089         fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
5090                 cpu, label,
5091                 ((msr >> 15) & 1) ? "EN" : "DIS",
5092                 ((msr >> 0) & 0x7FFF) * rapl_power_units,
5093                 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
5094                 (((msr >> 16) & 1) ? "EN" : "DIS"));
5095
5096         return;
5097 }
5098
5099 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5100 {
5101         unsigned long long msr;
5102         const char *msr_name;
5103         int cpu;
5104
5105         UNUSED(c);
5106         UNUSED(p);
5107
5108         if (!do_rapl)
5109                 return 0;
5110
5111         /* RAPL counters are per package, so print only for 1st thread/package */
5112         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
5113                 return 0;
5114
5115         cpu = t->cpu_id;
5116         if (cpu_migrate(cpu)) {
5117                 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
5118                 return -1;
5119         }
5120
5121         if (do_rapl & RAPL_AMD_F17H) {
5122                 msr_name = "MSR_RAPL_PWR_UNIT";
5123                 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
5124                         return -1;
5125         } else {
5126                 msr_name = "MSR_RAPL_POWER_UNIT";
5127                 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
5128                         return -1;
5129         }
5130
5131         fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
5132                 rapl_power_units, rapl_energy_units, rapl_time_units);
5133
5134         if (do_rapl & RAPL_PKG_POWER_INFO) {
5135
5136                 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
5137                         return -5;
5138
5139                 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
5140                         cpu, msr,
5141                         ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5142                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5143                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5144                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
5145
5146         }
5147         if (do_rapl & RAPL_PKG) {
5148
5149                 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
5150                         return -9;
5151
5152                 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
5153                         cpu, msr, (msr >> 63) & 1 ? "" : "UN");
5154
5155                 print_power_limit_msr(cpu, msr, "PKG Limit #1");
5156                 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
5157                         cpu,
5158                         ((msr >> 47) & 1) ? "EN" : "DIS",
5159                         ((msr >> 32) & 0x7FFF) * rapl_power_units,
5160                         (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
5161                         ((msr >> 48) & 1) ? "EN" : "DIS");
5162
5163                 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
5164                         return -9;
5165
5166                 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
5167                 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
5168                         cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
5169         }
5170
5171         if (do_rapl & RAPL_DRAM_POWER_INFO) {
5172                 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
5173                         return -6;
5174
5175                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
5176                         cpu, msr,
5177                         ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5178                         ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5179                         ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
5180                         ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
5181         }
5182         if (do_rapl & RAPL_DRAM) {
5183                 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
5184                         return -9;
5185                 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
5186                         cpu, msr, (msr >> 31) & 1 ? "" : "UN");
5187
5188                 print_power_limit_msr(cpu, msr, "DRAM Limit");
5189         }
5190         if (do_rapl & RAPL_CORE_POLICY) {
5191                 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
5192                         return -7;
5193
5194                 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
5195         }
5196         if (do_rapl & RAPL_CORES_POWER_LIMIT) {
5197                 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
5198                         return -9;
5199                 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
5200                         cpu, msr, (msr >> 31) & 1 ? "" : "UN");
5201                 print_power_limit_msr(cpu, msr, "Cores Limit");
5202         }
5203         if (do_rapl & RAPL_GFX) {
5204                 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
5205                         return -8;
5206
5207                 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
5208
5209                 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
5210                         return -9;
5211                 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
5212                         cpu, msr, (msr >> 31) & 1 ? "" : "UN");
5213                 print_power_limit_msr(cpu, msr, "GFX Limit");
5214         }
5215         return 0;
5216 }
5217
5218 /*
5219  * SNB adds support for additional MSRs:
5220  *
5221  * MSR_PKG_C7_RESIDENCY            0x000003fa
5222  * MSR_CORE_C7_RESIDENCY           0x000003fe
5223  * MSR_PKG_C2_RESIDENCY            0x0000060d
5224  */
5225
5226 int has_snb_msrs(unsigned int family, unsigned int model)
5227 {
5228         if (!genuine_intel)
5229                 return 0;
5230
5231         if (family != 6)
5232                 return 0;
5233
5234         switch (model) {
5235         case INTEL_FAM6_SANDYBRIDGE:
5236         case INTEL_FAM6_SANDYBRIDGE_X:
5237         case INTEL_FAM6_IVYBRIDGE:      /* IVB */
5238         case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
5239         case INTEL_FAM6_HASWELL:        /* HSW */
5240         case INTEL_FAM6_HASWELL_X:      /* HSW */
5241         case INTEL_FAM6_HASWELL_L:      /* HSW */
5242         case INTEL_FAM6_HASWELL_G:      /* HSW */
5243         case INTEL_FAM6_BROADWELL:      /* BDW */
5244         case INTEL_FAM6_BROADWELL_G:    /* BDW */
5245         case INTEL_FAM6_BROADWELL_X:    /* BDX */
5246         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
5247         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5248         case INTEL_FAM6_SKYLAKE_X:      /* SKX */
5249         case INTEL_FAM6_ICELAKE_X:      /* ICX */
5250         case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
5251         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
5252         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5253         case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
5254         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
5255         case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
5256                 return 1;
5257         }
5258         return 0;
5259 }
5260
5261 /*
5262  * HSW ULT added support for C8/C9/C10 MSRs:
5263  *
5264  * MSR_PKG_C8_RESIDENCY         0x00000630
5265  * MSR_PKG_C9_RESIDENCY         0x00000631
5266  * MSR_PKG_C10_RESIDENCY        0x00000632
5267  *
5268  * MSR_PKGC8_IRTL               0x00000633
5269  * MSR_PKGC9_IRTL               0x00000634
5270  * MSR_PKGC10_IRTL              0x00000635
5271  *
5272  */
5273 int has_c8910_msrs(unsigned int family, unsigned int model)
5274 {
5275         if (!genuine_intel)
5276                 return 0;
5277
5278         if (family != 6)
5279                 return 0;
5280
5281         switch (model) {
5282         case INTEL_FAM6_HASWELL_L:      /* HSW */
5283         case INTEL_FAM6_BROADWELL:      /* BDW */
5284         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
5285         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5286         case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
5287         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5288         case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
5289                 return 1;
5290         }
5291         return 0;
5292 }
5293
5294 /*
5295  * SKL adds support for additional MSRS:
5296  *
5297  * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
5298  * MSR_PKG_ANY_CORE_C0_RES         0x00000659
5299  * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
5300  * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
5301  */
5302 int has_skl_msrs(unsigned int family, unsigned int model)
5303 {
5304         if (!genuine_intel)
5305                 return 0;
5306
5307         if (family != 6)
5308                 return 0;
5309
5310         switch (model) {
5311         case INTEL_FAM6_SKYLAKE_L:      /* SKL */
5312         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5313                 return 1;
5314         }
5315         return 0;
5316 }
5317
5318 int is_slm(unsigned int family, unsigned int model)
5319 {
5320         if (!genuine_intel)
5321                 return 0;
5322
5323         if (family != 6)
5324                 return 0;
5325
5326         switch (model) {
5327         case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
5328         case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
5329                 return 1;
5330         }
5331         return 0;
5332 }
5333
5334 int is_knl(unsigned int family, unsigned int model)
5335 {
5336         if (!genuine_intel)
5337                 return 0;
5338
5339         if (family != 6)
5340                 return 0;
5341
5342         switch (model) {
5343         case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
5344                 return 1;
5345         }
5346         return 0;
5347 }
5348
5349 int is_cnl(unsigned int family, unsigned int model)
5350 {
5351         if (!genuine_intel)
5352                 return 0;
5353
5354         if (family != 6)
5355                 return 0;
5356
5357         switch (model) {
5358         case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
5359                 return 1;
5360         }
5361
5362         return 0;
5363 }
5364
5365 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
5366 {
5367         if (is_knl(family, model))
5368                 return 1024;
5369         return 1;
5370 }
5371
5372 #define SLM_BCLK_FREQS 5
5373 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
5374
5375 double slm_bclk(void)
5376 {
5377         unsigned long long msr = 3;
5378         unsigned int i;
5379         double freq;
5380
5381         if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
5382                 fprintf(outf, "SLM BCLK: unknown\n");
5383
5384         i = msr & 0xf;
5385         if (i >= SLM_BCLK_FREQS) {
5386                 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
5387                 i = 3;
5388         }
5389         freq = slm_freq_table[i];
5390
5391         if (!quiet)
5392                 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
5393
5394         return freq;
5395 }
5396
5397 double discover_bclk(unsigned int family, unsigned int model)
5398 {
5399         if (has_snb_msrs(family, model) || is_knl(family, model))
5400                 return 100.00;
5401         else if (is_slm(family, model))
5402                 return slm_bclk();
5403         else
5404                 return 133.33;
5405 }
5406
5407 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5408 {
5409         unsigned int eax, ebx, ecx, edx;
5410
5411         UNUSED(c);
5412         UNUSED(p);
5413
5414         if (!genuine_intel)
5415                 return 0;
5416
5417         if (cpu_migrate(t->cpu_id)) {
5418                 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
5419                 return -1;
5420         }
5421
5422         if (max_level < 0x1a)
5423                 return 0;
5424
5425         __cpuid(0x1a, eax, ebx, ecx, edx);
5426         eax = (eax >> 24) & 0xFF;
5427         if (eax == 0x20)
5428                 t->is_atom = true;
5429         return 0;
5430 }
5431
5432 /*
5433  * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
5434  * the Thermal Control Circuit (TCC) activates.
5435  * This is usually equal to tjMax.
5436  *
5437  * Older processors do not have this MSR, so there we guess,
5438  * but also allow cmdline over-ride with -T.
5439  *
5440  * Several MSR temperature values are in units of degrees-C
5441  * below this value, including the Digital Thermal Sensor (DTS),
5442  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
5443  */
5444 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
5445 {
5446         unsigned long long msr;
5447         unsigned int tcc_default, tcc_offset;
5448         int cpu;
5449
5450         UNUSED(c);
5451         UNUSED(p);
5452
5453         /* tj_max is used only for dts or ptm */
5454         if (!(do_dts || do_ptm))
5455                 return 0;
5456
5457         /* this is a per-package concept */
5458         if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
5459                 return 0;
5460
5461         cpu = t->cpu_id;
5462         if (cpu_migrate(cpu)) {
5463                 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
5464                 return -1;
5465         }
5466
5467         if (tj_max_override != 0) {
5468                 tj_max = tj_max_override;
5469                 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
5470                 return 0;
5471         }
5472
5473         /* Temperature Target MSR is Nehalem and newer only */
5474         if (!do_nhm_platform_info)
5475                 goto guess;
5476
5477         if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
5478                 goto guess;
5479
5480         tcc_default = (msr >> 16) & 0xFF;
5481
5482         if (!quiet) {
5483                 switch (tcc_offset_bits) {
5484                 case 4:
5485                         tcc_offset = (msr >> 24) & 0xF;
5486                         fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5487                                 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
5488                         break;
5489                 case 6:
5490                         tcc_offset = (msr >> 24) & 0x3F;
5491                         fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
5492                                 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
5493                         break;
5494                 default:
5495                         fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
5496                         break;
5497                 }
5498         }
5499
5500         if (!tcc_default)
5501                 goto guess;
5502
5503         tj_max = tcc_default;
5504
5505         return 0;
5506
5507 guess:
5508         tj_max = TJMAX_DEFAULT;
5509         fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
5510
5511         return 0;
5512 }
5513
5514 void decode_feature_control_msr(void)
5515 {
5516         unsigned long long msr;
5517
5518         if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
5519                 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
5520                         base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
5521 }
5522
5523 void decode_misc_enable_msr(void)
5524 {
5525         unsigned long long msr;
5526
5527         if (!genuine_intel)
5528                 return;
5529
5530         if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
5531                 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
5532                         base_cpu, msr,
5533                         msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
5534                         msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
5535                         msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
5536                         msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
5537                         msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
5538 }
5539
5540 void decode_misc_feature_control(void)
5541 {
5542         unsigned long long msr;
5543
5544         if (!has_misc_feature_control)
5545                 return;
5546
5547         if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
5548                 fprintf(outf,
5549                         "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
5550                         base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
5551                         msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
5552 }
5553
5554 /*
5555  * Decode MSR_MISC_PWR_MGMT
5556  *
5557  * Decode the bits according to the Nehalem documentation
5558  * bit[0] seems to continue to have same meaning going forward
5559  * bit[1] less so...
5560  */
5561 void decode_misc_pwr_mgmt_msr(void)
5562 {
5563         unsigned long long msr;
5564
5565         if (!do_nhm_platform_info)
5566                 return;
5567
5568         if (no_MSR_MISC_PWR_MGMT)
5569                 return;
5570
5571         if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
5572                 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
5573                         base_cpu, msr,
5574                         msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
5575 }
5576
5577 /*
5578  * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
5579  *
5580  * This MSRs are present on Silvermont processors,
5581  * Intel Atom processor E3000 series (Baytrail), and friends.
5582  */
5583 void decode_c6_demotion_policy_msr(void)
5584 {
5585         unsigned long long msr;
5586
5587         if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
5588                 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
5589                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5590
5591         if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
5592                 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
5593                         base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
5594 }
5595
5596 /*
5597  * When models are the same, for the purpose of turbostat, reuse
5598  */
5599 unsigned int intel_model_duplicates(unsigned int model)
5600 {
5601
5602         switch (model) {
5603         case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
5604         case INTEL_FAM6_NEHALEM_G:      /* Core i7 and i5 Processor - Nehalem */
5605         case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
5606         case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
5607                 return INTEL_FAM6_NEHALEM;
5608
5609         case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
5610                 return INTEL_FAM6_NEHALEM_EX;
5611
5612         case INTEL_FAM6_XEON_PHI_KNM:
5613                 return INTEL_FAM6_XEON_PHI_KNL;
5614
5615         case INTEL_FAM6_BROADWELL_D:    /* BDX-DE */
5616                 return INTEL_FAM6_BROADWELL_X;
5617
5618         case INTEL_FAM6_SKYLAKE:
5619         case INTEL_FAM6_KABYLAKE_L:
5620         case INTEL_FAM6_KABYLAKE:
5621         case INTEL_FAM6_COMETLAKE_L:
5622         case INTEL_FAM6_COMETLAKE:
5623                 return INTEL_FAM6_SKYLAKE_L;
5624
5625         case INTEL_FAM6_ICELAKE_L:
5626         case INTEL_FAM6_ICELAKE_NNPI:
5627         case INTEL_FAM6_TIGERLAKE_L:
5628         case INTEL_FAM6_TIGERLAKE:
5629         case INTEL_FAM6_ROCKETLAKE:
5630         case INTEL_FAM6_LAKEFIELD:
5631         case INTEL_FAM6_ALDERLAKE:
5632         case INTEL_FAM6_ALDERLAKE_L:
5633         case INTEL_FAM6_ATOM_GRACEMONT:
5634         case INTEL_FAM6_RAPTORLAKE:
5635         case INTEL_FAM6_RAPTORLAKE_P:
5636         case INTEL_FAM6_RAPTORLAKE_S:
5637         case INTEL_FAM6_METEORLAKE:
5638         case INTEL_FAM6_METEORLAKE_L:
5639                 return INTEL_FAM6_CANNONLAKE_L;
5640
5641         case INTEL_FAM6_ATOM_TREMONT_L:
5642                 return INTEL_FAM6_ATOM_TREMONT;
5643
5644         case INTEL_FAM6_ICELAKE_D:
5645                 return INTEL_FAM6_ICELAKE_X;
5646
5647         case INTEL_FAM6_EMERALDRAPIDS_X:
5648                 return INTEL_FAM6_SAPPHIRERAPIDS_X;
5649         }
5650         return model;
5651 }
5652
5653 void print_dev_latency(void)
5654 {
5655         char *path = "/dev/cpu_dma_latency";
5656         int fd;
5657         int value;
5658         int retval;
5659
5660         fd = open(path, O_RDONLY);
5661         if (fd < 0) {
5662                 warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
5663                 return;
5664         }
5665
5666         retval = read(fd, (void *)&value, sizeof(int));
5667         if (retval != sizeof(int)) {
5668                 warn("read failed %s", path);
5669                 close(fd);
5670                 return;
5671         }
5672         fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
5673
5674         close(fd);
5675 }
5676
5677 /*
5678  * Linux-perf manages the HW instructions-retired counter
5679  * by enabling when requested, and hiding rollover
5680  */
5681 void linux_perf_init(void)
5682 {
5683         if (!BIC_IS_ENABLED(BIC_IPC))
5684                 return;
5685
5686         if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
5687                 return;
5688
5689         fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
5690         if (fd_instr_count_percpu == NULL)
5691                 err(-1, "calloc fd_instr_count_percpu");
5692
5693         BIC_PRESENT(BIC_IPC);
5694 }
5695
5696 void process_cpuid()
5697 {
5698         unsigned int eax, ebx, ecx, edx;
5699         unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
5700         unsigned long long ucode_patch = 0;
5701
5702         eax = ebx = ecx = edx = 0;
5703
5704         __cpuid(0, max_level, ebx, ecx, edx);
5705
5706         if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
5707                 genuine_intel = 1;
5708         else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
5709                 authentic_amd = 1;
5710         else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
5711                 hygon_genuine = 1;
5712
5713         if (!quiet)
5714                 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
5715                         (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
5716
5717         __cpuid(1, fms, ebx, ecx, edx);
5718         family = (fms >> 8) & 0xf;
5719         model = (fms >> 4) & 0xf;
5720         stepping = fms & 0xf;
5721         if (family == 0xf)
5722                 family += (fms >> 20) & 0xff;
5723         if (family >= 6)
5724                 model += ((fms >> 16) & 0xf) << 4;
5725         ecx_flags = ecx;
5726         edx_flags = edx;
5727
5728         if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
5729                 warnx("get_msr(UCODE)");
5730
5731         /*
5732          * check max extended function levels of CPUID.
5733          * This is needed to check for invariant TSC.
5734          * This check is valid for both Intel and AMD.
5735          */
5736         ebx = ecx = edx = 0;
5737         __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
5738
5739         if (!quiet) {
5740                 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
5741                         family, model, stepping, family, model, stepping,
5742                         (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
5743                 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
5744                 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
5745                         ecx_flags & (1 << 0) ? "SSE3" : "-",
5746                         ecx_flags & (1 << 3) ? "MONITOR" : "-",
5747                         ecx_flags & (1 << 6) ? "SMX" : "-",
5748                         ecx_flags & (1 << 7) ? "EIST" : "-",
5749                         ecx_flags & (1 << 8) ? "TM2" : "-",
5750                         edx_flags & (1 << 4) ? "TSC" : "-",
5751                         edx_flags & (1 << 5) ? "MSR" : "-",
5752                         edx_flags & (1 << 22) ? "ACPI-TM" : "-",
5753                         edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
5754         }
5755
5756         probe_platform_features(family, model);
5757         if (genuine_intel)
5758                 model = intel_model_duplicates(model);
5759
5760         if (!(edx_flags & (1 << 5)))
5761                 errx(1, "CPUID: no MSR");
5762
5763         if (max_extended_level >= 0x80000007) {
5764
5765                 /*
5766                  * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
5767                  * this check is valid for both Intel and AMD
5768                  */
5769                 __cpuid(0x80000007, eax, ebx, ecx, edx);
5770                 has_invariant_tsc = edx & (1 << 8);
5771         }
5772
5773         /*
5774          * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
5775          * this check is valid for both Intel and AMD
5776          */
5777
5778         __cpuid(0x6, eax, ebx, ecx, edx);
5779         has_aperf = ecx & (1 << 0);
5780         if (has_aperf) {
5781                 BIC_PRESENT(BIC_Avg_MHz);
5782                 BIC_PRESENT(BIC_Busy);
5783                 BIC_PRESENT(BIC_Bzy_MHz);
5784         }
5785         do_dts = eax & (1 << 0);
5786         if (do_dts)
5787                 BIC_PRESENT(BIC_CoreTmp);
5788         has_turbo = eax & (1 << 1);
5789         do_ptm = eax & (1 << 6);
5790         if (do_ptm)
5791                 BIC_PRESENT(BIC_PkgTmp);
5792         has_hwp = eax & (1 << 7);
5793         has_hwp_notify = eax & (1 << 8);
5794         has_hwp_activity_window = eax & (1 << 9);
5795         has_hwp_epp = eax & (1 << 10);
5796         has_hwp_pkg = eax & (1 << 11);
5797         has_epb = ecx & (1 << 3);
5798
5799         if (!quiet)
5800                 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
5801                         "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
5802                         has_aperf ? "" : "No-",
5803                         has_turbo ? "" : "No-",
5804                         do_dts ? "" : "No-",
5805                         do_ptm ? "" : "No-",
5806                         has_hwp ? "" : "No-",
5807                         has_hwp_notify ? "" : "No-",
5808                         has_hwp_activity_window ? "" : "No-",
5809                         has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
5810
5811         if (!quiet)
5812                 decode_misc_enable_msr();
5813
5814         if (max_level >= 0x7 && !quiet) {
5815                 int has_sgx;
5816
5817                 ecx = 0;
5818
5819                 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
5820
5821                 has_sgx = ebx & (1 << 2);
5822
5823                 is_hybrid = edx & (1 << 15);
5824
5825                 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
5826
5827                 if (has_sgx)
5828                         decode_feature_control_msr();
5829         }
5830
5831         if (max_level >= 0x15) {
5832                 unsigned int eax_crystal;
5833                 unsigned int ebx_tsc;
5834
5835                 /*
5836                  * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
5837                  */
5838                 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
5839                 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
5840
5841                 if (ebx_tsc != 0) {
5842
5843                         if (!quiet && (ebx != 0))
5844                                 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
5845                                         eax_crystal, ebx_tsc, crystal_hz);
5846
5847                         if (crystal_hz == 0)
5848                                 switch (model) {
5849                                 case INTEL_FAM6_SKYLAKE_L:      /* SKL */
5850                                         crystal_hz = 24000000;  /* 24.0 MHz */
5851                                         break;
5852                                 case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
5853                                         crystal_hz = 25000000;  /* 25.0 MHz */
5854                                         break;
5855                                 case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
5856                                 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
5857                                         crystal_hz = 19200000;  /* 19.2 MHz */
5858                                         break;
5859                                 default:
5860                                         crystal_hz = 0;
5861                                 }
5862
5863                         if (crystal_hz) {
5864                                 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
5865                                 if (!quiet)
5866                                         fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
5867                                                 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
5868                         }
5869                 }
5870         }
5871         if (max_level >= 0x16) {
5872                 unsigned int base_mhz, max_mhz, bus_mhz, edx;
5873
5874                 /*
5875                  * CPUID 16H Base MHz, Max MHz, Bus MHz
5876                  */
5877                 base_mhz = max_mhz = bus_mhz = edx = 0;
5878
5879                 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
5880                 if (!quiet)
5881                         fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
5882                                 base_mhz, max_mhz, bus_mhz);
5883         }
5884
5885         if (has_aperf)
5886                 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
5887
5888         BIC_PRESENT(BIC_IRQ);
5889         BIC_PRESENT(BIC_TSC_MHz);
5890
5891         if (probe_nhm_msrs(family, model)) {
5892                 do_nhm_platform_info = 1;
5893                 BIC_PRESENT(BIC_CPU_c1);
5894                 BIC_PRESENT(BIC_CPU_c3);
5895                 BIC_PRESENT(BIC_CPU_c6);
5896                 BIC_PRESENT(BIC_SMI);
5897         }
5898         do_snb_cstates = has_snb_msrs(family, model);
5899
5900         if (do_snb_cstates)
5901                 BIC_PRESENT(BIC_CPU_c7);
5902
5903         do_irtl_snb = has_snb_msrs(family, model);
5904         if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
5905                 BIC_PRESENT(BIC_Pkgpc2);
5906         if (pkg_cstate_limit >= PCL__3)
5907                 BIC_PRESENT(BIC_Pkgpc3);
5908         if (pkg_cstate_limit >= PCL__6)
5909                 BIC_PRESENT(BIC_Pkgpc6);
5910         if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
5911                 BIC_PRESENT(BIC_Pkgpc7);
5912         if (has_slv_msrs(family, model)) {
5913                 BIC_NOT_PRESENT(BIC_Pkgpc2);
5914                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5915                 BIC_PRESENT(BIC_Pkgpc6);
5916                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5917                 BIC_PRESENT(BIC_Mod_c6);
5918                 use_c1_residency_msr = 1;
5919         }
5920         if (is_jvl(family, model)) {
5921                 BIC_NOT_PRESENT(BIC_CPU_c3);
5922                 BIC_NOT_PRESENT(BIC_CPU_c7);
5923                 BIC_NOT_PRESENT(BIC_Pkgpc2);
5924                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5925                 BIC_NOT_PRESENT(BIC_Pkgpc6);
5926                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5927         }
5928         if (is_dnv(family, model)) {
5929                 BIC_PRESENT(BIC_CPU_c1);
5930                 BIC_NOT_PRESENT(BIC_CPU_c3);
5931                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5932                 BIC_NOT_PRESENT(BIC_CPU_c7);
5933                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5934                 use_c1_residency_msr = 1;
5935         }
5936         if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
5937                 BIC_NOT_PRESENT(BIC_CPU_c3);
5938                 BIC_NOT_PRESENT(BIC_Pkgpc3);
5939                 BIC_NOT_PRESENT(BIC_CPU_c7);
5940                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5941         }
5942         if (is_bdx(family, model)) {
5943                 BIC_NOT_PRESENT(BIC_CPU_c7);
5944                 BIC_NOT_PRESENT(BIC_Pkgpc7);
5945         }
5946         if (has_c8910_msrs(family, model)) {
5947                 if (pkg_cstate_limit >= PCL__8)
5948                         BIC_PRESENT(BIC_Pkgpc8);
5949                 if (pkg_cstate_limit >= PCL__9)
5950                         BIC_PRESENT(BIC_Pkgpc9);
5951                 if (pkg_cstate_limit >= PCL_10)
5952                         BIC_PRESENT(BIC_Pkgpc10);
5953         }
5954         do_irtl_hsw = has_c8910_msrs(family, model);
5955         if (has_skl_msrs(family, model)) {
5956                 BIC_PRESENT(BIC_Totl_c0);
5957                 BIC_PRESENT(BIC_Any_c0);
5958                 BIC_PRESENT(BIC_GFX_c0);
5959                 BIC_PRESENT(BIC_CPUGFX);
5960         }
5961         do_slm_cstates = is_slm(family, model);
5962         do_knl_cstates = is_knl(family, model);
5963
5964         if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
5965                 BIC_NOT_PRESENT(BIC_CPU_c3);
5966
5967         if (!quiet)
5968                 decode_misc_pwr_mgmt_msr();
5969
5970         if (!quiet && has_slv_msrs(family, model))
5971                 decode_c6_demotion_policy_msr();
5972
5973         rapl_probe(family, model);
5974         perf_limit_reasons_probe(family, model);
5975         automatic_cstate_conversion_probe(family, model);
5976         prewake_cstate_probe(family, model);
5977
5978         check_tcc_offset(model);
5979
5980         if (!quiet)
5981                 dump_cstate_pstate_config_info(family, model);
5982         intel_uncore_frequency_probe();
5983
5984         if (!quiet)
5985                 print_dev_latency();
5986         if (!quiet)
5987                 dump_sysfs_cstate_config();
5988         if (!quiet)
5989                 dump_sysfs_pstate_config();
5990
5991         if (has_skl_msrs(family, model) || is_ehl(family, model))
5992                 calculate_tsc_tweak();
5993
5994         if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
5995                 BIC_PRESENT(BIC_GFX_rc6);
5996
5997         if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
5998             !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
5999                 BIC_PRESENT(BIC_GFXMHz);
6000
6001         if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
6002             !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
6003                 BIC_PRESENT(BIC_GFXACTMHz);
6004
6005         if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
6006                 BIC_PRESENT(BIC_CPU_LPI);
6007         else
6008                 BIC_NOT_PRESENT(BIC_CPU_LPI);
6009
6010         if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
6011                 BIC_PRESENT(BIC_CORE_THROT_CNT);
6012         else
6013                 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
6014
6015         if (!access(sys_lpi_file_sysfs, R_OK)) {
6016                 sys_lpi_file = sys_lpi_file_sysfs;
6017                 BIC_PRESENT(BIC_SYS_LPI);
6018         } else if (!access(sys_lpi_file_debugfs, R_OK)) {
6019                 sys_lpi_file = sys_lpi_file_debugfs;
6020                 BIC_PRESENT(BIC_SYS_LPI);
6021         } else {
6022                 sys_lpi_file_sysfs = NULL;
6023                 BIC_NOT_PRESENT(BIC_SYS_LPI);
6024         }
6025
6026         if (!quiet)
6027                 decode_misc_feature_control();
6028
6029         return;
6030 }
6031
6032 /*
6033  * in /dev/cpu/ return success for names that are numbers
6034  * ie. filter out ".", "..", "microcode".
6035  */
6036 int dir_filter(const struct dirent *dirp)
6037 {
6038         if (isdigit(dirp->d_name[0]))
6039                 return 1;
6040         else
6041                 return 0;
6042 }
6043
6044 void topology_probe()
6045 {
6046         int i;
6047         int max_core_id = 0;
6048         int max_package_id = 0;
6049         int max_die_id = 0;
6050         int max_siblings = 0;
6051
6052         /* Initialize num_cpus, max_cpu_num */
6053         set_max_cpu_num();
6054         topo.num_cpus = 0;
6055         for_all_proc_cpus(count_cpus);
6056         if (!summary_only && topo.num_cpus > 1)
6057                 BIC_PRESENT(BIC_CPU);
6058
6059         if (debug > 1)
6060                 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
6061
6062         cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
6063         if (cpus == NULL)
6064                 err(1, "calloc cpus");
6065
6066         /*
6067          * Allocate and initialize cpu_present_set
6068          */
6069         cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
6070         if (cpu_present_set == NULL)
6071                 err(3, "CPU_ALLOC");
6072         cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
6073         CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
6074         for_all_proc_cpus(mark_cpu_present);
6075
6076         /*
6077          * Validate that all cpus in cpu_subset are also in cpu_present_set
6078          */
6079         for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
6080                 if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
6081                         if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
6082                                 err(1, "cpu%d not present", i);
6083         }
6084
6085         /*
6086          * Allocate and initialize cpu_affinity_set
6087          */
6088         cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
6089         if (cpu_affinity_set == NULL)
6090                 err(3, "CPU_ALLOC");
6091         cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
6092         CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
6093
6094         for_all_proc_cpus(init_thread_id);
6095
6096         /*
6097          * For online cpus
6098          * find max_core_id, max_package_id
6099          */
6100         for (i = 0; i <= topo.max_cpu_num; ++i) {
6101                 int siblings;
6102
6103                 if (cpu_is_not_present(i)) {
6104                         if (debug > 1)
6105                                 fprintf(outf, "cpu%d NOT PRESENT\n", i);
6106                         continue;
6107                 }
6108
6109                 cpus[i].logical_cpu_id = i;
6110
6111                 /* get package information */
6112                 cpus[i].physical_package_id = get_physical_package_id(i);
6113                 if (cpus[i].physical_package_id > max_package_id)
6114                         max_package_id = cpus[i].physical_package_id;
6115
6116                 /* get die information */
6117                 cpus[i].die_id = get_die_id(i);
6118                 if (cpus[i].die_id > max_die_id)
6119                         max_die_id = cpus[i].die_id;
6120
6121                 /* get numa node information */
6122                 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
6123                 if (cpus[i].physical_node_id > topo.max_node_num)
6124                         topo.max_node_num = cpus[i].physical_node_id;
6125
6126                 /* get core information */
6127                 cpus[i].physical_core_id = get_core_id(i);
6128                 if (cpus[i].physical_core_id > max_core_id)
6129                         max_core_id = cpus[i].physical_core_id;
6130
6131                 /* get thread information */
6132                 siblings = get_thread_siblings(&cpus[i]);
6133                 if (siblings > max_siblings)
6134                         max_siblings = siblings;
6135                 if (cpus[i].thread_id == 0)
6136                         topo.num_cores++;
6137         }
6138
6139         topo.cores_per_node = max_core_id + 1;
6140         if (debug > 1)
6141                 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
6142         if (!summary_only && topo.cores_per_node > 1)
6143                 BIC_PRESENT(BIC_Core);
6144
6145         topo.num_die = max_die_id + 1;
6146         if (debug > 1)
6147                 fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die);
6148         if (!summary_only && topo.num_die > 1)
6149                 BIC_PRESENT(BIC_Die);
6150
6151         topo.num_packages = max_package_id + 1;
6152         if (debug > 1)
6153                 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
6154         if (!summary_only && topo.num_packages > 1)
6155                 BIC_PRESENT(BIC_Package);
6156
6157         set_node_data();
6158         if (debug > 1)
6159                 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
6160         if (!summary_only && topo.nodes_per_pkg > 1)
6161                 BIC_PRESENT(BIC_Node);
6162
6163         topo.threads_per_core = max_siblings;
6164         if (debug > 1)
6165                 fprintf(outf, "max_siblings %d\n", max_siblings);
6166
6167         if (debug < 1)
6168                 return;
6169
6170         for (i = 0; i <= topo.max_cpu_num; ++i) {
6171                 if (cpu_is_not_present(i))
6172                         continue;
6173                 fprintf(outf,
6174                         "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
6175                         i, cpus[i].physical_package_id, cpus[i].die_id,
6176                         cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
6177         }
6178
6179 }
6180
6181 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
6182 {
6183         int i;
6184         int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
6185         int num_threads = topo.threads_per_core * num_cores;
6186
6187         *t = calloc(num_threads, sizeof(struct thread_data));
6188         if (*t == NULL)
6189                 goto error;
6190
6191         for (i = 0; i < num_threads; i++)
6192                 (*t)[i].cpu_id = -1;
6193
6194         *c = calloc(num_cores, sizeof(struct core_data));
6195         if (*c == NULL)
6196                 goto error;
6197
6198         for (i = 0; i < num_cores; i++)
6199                 (*c)[i].core_id = -1;
6200
6201         *p = calloc(topo.num_packages, sizeof(struct pkg_data));
6202         if (*p == NULL)
6203                 goto error;
6204
6205         for (i = 0; i < topo.num_packages; i++)
6206                 (*p)[i].package_id = i;
6207
6208         return;
6209 error:
6210         err(1, "calloc counters");
6211 }
6212
6213 /*
6214  * init_counter()
6215  *
6216  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
6217  */
6218 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
6219 {
6220         int pkg_id = cpus[cpu_id].physical_package_id;
6221         int node_id = cpus[cpu_id].logical_node_id;
6222         int core_id = cpus[cpu_id].physical_core_id;
6223         int thread_id = cpus[cpu_id].thread_id;
6224         struct thread_data *t;
6225         struct core_data *c;
6226         struct pkg_data *p;
6227
6228         /* Workaround for systems where physical_node_id==-1
6229          * and logical_node_id==(-1 - topo.num_cpus)
6230          */
6231         if (node_id < 0)
6232                 node_id = 0;
6233
6234         t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
6235         c = GET_CORE(core_base, core_id, node_id, pkg_id);
6236         p = GET_PKG(pkg_base, pkg_id);
6237
6238         t->cpu_id = cpu_id;
6239         if (thread_id == 0) {
6240                 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
6241                 if (cpu_is_first_core_in_package(cpu_id))
6242                         t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
6243         }
6244
6245         c->core_id = core_id;
6246         p->package_id = pkg_id;
6247 }
6248
6249 int initialize_counters(int cpu_id)
6250 {
6251         init_counter(EVEN_COUNTERS, cpu_id);
6252         init_counter(ODD_COUNTERS, cpu_id);
6253         return 0;
6254 }
6255
6256 void allocate_output_buffer()
6257 {
6258         output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
6259         outp = output_buffer;
6260         if (outp == NULL)
6261                 err(-1, "calloc output buffer");
6262 }
6263
6264 void allocate_fd_percpu(void)
6265 {
6266         fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
6267         if (fd_percpu == NULL)
6268                 err(-1, "calloc fd_percpu");
6269 }
6270
6271 void allocate_irq_buffers(void)
6272 {
6273         irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
6274         if (irq_column_2_cpu == NULL)
6275                 err(-1, "calloc %d", topo.num_cpus);
6276
6277         irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
6278         if (irqs_per_cpu == NULL)
6279                 err(-1, "calloc %d", topo.max_cpu_num + 1);
6280 }
6281
6282 void setup_all_buffers(void)
6283 {
6284         topology_probe();
6285         allocate_irq_buffers();
6286         allocate_fd_percpu();
6287         allocate_counters(&thread_even, &core_even, &package_even);
6288         allocate_counters(&thread_odd, &core_odd, &package_odd);
6289         allocate_output_buffer();
6290         for_all_proc_cpus(initialize_counters);
6291 }
6292
6293 void set_base_cpu(void)
6294 {
6295         base_cpu = sched_getcpu();
6296         if (base_cpu < 0)
6297                 err(-ENODEV, "No valid cpus found");
6298
6299         if (debug > 1)
6300                 fprintf(outf, "base_cpu = %d\n", base_cpu);
6301 }
6302
6303 void turbostat_init()
6304 {
6305         setup_all_buffers();
6306         set_base_cpu();
6307         check_dev_msr();
6308         check_permissions();
6309         process_cpuid();
6310         linux_perf_init();
6311
6312         if (!quiet)
6313                 for_all_cpus(print_hwp, ODD_COUNTERS);
6314
6315         if (!quiet)
6316                 for_all_cpus(print_epb, ODD_COUNTERS);
6317
6318         if (!quiet)
6319                 for_all_cpus(print_perf_limit, ODD_COUNTERS);
6320
6321         if (!quiet)
6322                 for_all_cpus(print_rapl, ODD_COUNTERS);
6323
6324         for_all_cpus(set_temperature_target, ODD_COUNTERS);
6325
6326         for_all_cpus(get_cpu_type, ODD_COUNTERS);
6327         for_all_cpus(get_cpu_type, EVEN_COUNTERS);
6328
6329         if (!quiet)
6330                 for_all_cpus(print_thermal, ODD_COUNTERS);
6331
6332         if (!quiet && do_irtl_snb)
6333                 print_irtl();
6334
6335         if (DO_BIC(BIC_IPC))
6336                 (void)get_instr_count_fd(base_cpu);
6337 }
6338
6339 int fork_it(char **argv)
6340 {
6341         pid_t child_pid;
6342         int status;
6343
6344         snapshot_proc_sysfs_files();
6345         status = for_all_cpus(get_counters, EVEN_COUNTERS);
6346         first_counter_read = 0;
6347         if (status)
6348                 exit(status);
6349         /* clear affinity side-effect of get_counters() */
6350         sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
6351         gettimeofday(&tv_even, (struct timezone *)NULL);
6352
6353         child_pid = fork();
6354         if (!child_pid) {
6355                 /* child */
6356                 execvp(argv[0], argv);
6357                 err(errno, "exec %s", argv[0]);
6358         } else {
6359
6360                 /* parent */
6361                 if (child_pid == -1)
6362                         err(1, "fork");
6363
6364                 signal(SIGINT, SIG_IGN);
6365                 signal(SIGQUIT, SIG_IGN);
6366                 if (waitpid(child_pid, &status, 0) == -1)
6367                         err(status, "waitpid");
6368
6369                 if (WIFEXITED(status))
6370                         status = WEXITSTATUS(status);
6371         }
6372         /*
6373          * n.b. fork_it() does not check for errors from for_all_cpus()
6374          * because re-starting is problematic when forking
6375          */
6376         snapshot_proc_sysfs_files();
6377         for_all_cpus(get_counters, ODD_COUNTERS);
6378         gettimeofday(&tv_odd, (struct timezone *)NULL);
6379         timersub(&tv_odd, &tv_even, &tv_delta);
6380         if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
6381                 fprintf(outf, "%s: Counter reset detected\n", progname);
6382         else {
6383                 compute_average(EVEN_COUNTERS);
6384                 format_all_counters(EVEN_COUNTERS);
6385         }
6386
6387         fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
6388
6389         flush_output_stderr();
6390
6391         return status;
6392 }
6393
6394 int get_and_dump_counters(void)
6395 {
6396         int status;
6397
6398         snapshot_proc_sysfs_files();
6399         status = for_all_cpus(get_counters, ODD_COUNTERS);
6400         if (status)
6401                 return status;
6402
6403         status = for_all_cpus(dump_counters, ODD_COUNTERS);
6404         if (status)
6405                 return status;
6406
6407         flush_output_stdout();
6408
6409         return status;
6410 }
6411
6412 void print_version()
6413 {
6414         fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n");
6415 }
6416
6417 #define COMMAND_LINE_SIZE 2048
6418
6419 void print_bootcmd(void)
6420 {
6421         char bootcmd[COMMAND_LINE_SIZE];
6422         FILE *fp;
6423         int ret;
6424
6425         memset(bootcmd, 0, COMMAND_LINE_SIZE);
6426         fp = fopen("/proc/cmdline", "r");
6427         if (!fp)
6428                 return;
6429
6430         ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
6431         if (ret) {
6432                 bootcmd[ret] = '\0';
6433                 /* the last character is already '\n' */
6434                 fprintf(outf, "Kernel command line: %s", bootcmd);
6435         }
6436
6437         fclose(fp);
6438 }
6439
6440 int add_counter(unsigned int msr_num, char *path, char *name,
6441                 unsigned int width, enum counter_scope scope,
6442                 enum counter_type type, enum counter_format format, int flags)
6443 {
6444         struct msr_counter *msrp;
6445
6446         msrp = calloc(1, sizeof(struct msr_counter));
6447         if (msrp == NULL) {
6448                 perror("calloc");
6449                 exit(1);
6450         }
6451
6452         msrp->msr_num = msr_num;
6453         strncpy(msrp->name, name, NAME_BYTES - 1);
6454         if (path)
6455                 strncpy(msrp->path, path, PATH_BYTES - 1);
6456         msrp->width = width;
6457         msrp->type = type;
6458         msrp->format = format;
6459         msrp->flags = flags;
6460
6461         switch (scope) {
6462
6463         case SCOPE_CPU:
6464                 msrp->next = sys.tp;
6465                 sys.tp = msrp;
6466                 sys.added_thread_counters++;
6467                 if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
6468                         fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS);
6469                         exit(-1);
6470                 }
6471                 break;
6472
6473         case SCOPE_CORE:
6474                 msrp->next = sys.cp;
6475                 sys.cp = msrp;
6476                 sys.added_core_counters++;
6477                 if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
6478                         fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS);
6479                         exit(-1);
6480                 }
6481                 break;
6482
6483         case SCOPE_PACKAGE:
6484                 msrp->next = sys.pp;
6485                 sys.pp = msrp;
6486                 sys.added_package_counters++;
6487                 if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
6488                         fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS);
6489                         exit(-1);
6490                 }
6491                 break;
6492         }
6493
6494         return 0;
6495 }
6496
6497 void parse_add_command(char *add_command)
6498 {
6499         int msr_num = 0;
6500         char *path = NULL;
6501         char name_buffer[NAME_BYTES] = "";
6502         int width = 64;
6503         int fail = 0;
6504         enum counter_scope scope = SCOPE_CPU;
6505         enum counter_type type = COUNTER_CYCLES;
6506         enum counter_format format = FORMAT_DELTA;
6507
6508         while (add_command) {
6509
6510                 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
6511                         goto next;
6512
6513                 if (sscanf(add_command, "msr%d", &msr_num) == 1)
6514                         goto next;
6515
6516                 if (*add_command == '/') {
6517                         path = add_command;
6518                         goto next;
6519                 }
6520
6521                 if (sscanf(add_command, "u%d", &width) == 1) {
6522                         if ((width == 32) || (width == 64))
6523                                 goto next;
6524                         width = 64;
6525                 }
6526                 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
6527                         scope = SCOPE_CPU;
6528                         goto next;
6529                 }
6530                 if (!strncmp(add_command, "core", strlen("core"))) {
6531                         scope = SCOPE_CORE;
6532                         goto next;
6533                 }
6534                 if (!strncmp(add_command, "package", strlen("package"))) {
6535                         scope = SCOPE_PACKAGE;
6536                         goto next;
6537                 }
6538                 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
6539                         type = COUNTER_CYCLES;
6540                         goto next;
6541                 }
6542                 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
6543                         type = COUNTER_SECONDS;
6544                         goto next;
6545                 }
6546                 if (!strncmp(add_command, "usec", strlen("usec"))) {
6547                         type = COUNTER_USEC;
6548                         goto next;
6549                 }
6550                 if (!strncmp(add_command, "raw", strlen("raw"))) {
6551                         format = FORMAT_RAW;
6552                         goto next;
6553                 }
6554                 if (!strncmp(add_command, "delta", strlen("delta"))) {
6555                         format = FORMAT_DELTA;
6556                         goto next;
6557                 }
6558                 if (!strncmp(add_command, "percent", strlen("percent"))) {
6559                         format = FORMAT_PERCENT;
6560                         goto next;
6561                 }
6562
6563                 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
6564                         char *eos;
6565
6566                         eos = strchr(name_buffer, ',');
6567                         if (eos)
6568                                 *eos = '\0';
6569                         goto next;
6570                 }
6571
6572 next:
6573                 add_command = strchr(add_command, ',');
6574                 if (add_command) {
6575                         *add_command = '\0';
6576                         add_command++;
6577                 }
6578
6579         }
6580         if ((msr_num == 0) && (path == NULL)) {
6581                 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
6582                 fail++;
6583         }
6584
6585         /* generate default column header */
6586         if (*name_buffer == '\0') {
6587                 if (width == 32)
6588                         sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6589                 else
6590                         sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
6591         }
6592
6593         if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
6594                 fail++;
6595
6596         if (fail) {
6597                 help();
6598                 exit(1);
6599         }
6600 }
6601
6602 int is_deferred_add(char *name)
6603 {
6604         int i;
6605
6606         for (i = 0; i < deferred_add_index; ++i)
6607                 if (!strcmp(name, deferred_add_names[i]))
6608                         return 1;
6609         return 0;
6610 }
6611
6612 int is_deferred_skip(char *name)
6613 {
6614         int i;
6615
6616         for (i = 0; i < deferred_skip_index; ++i)
6617                 if (!strcmp(name, deferred_skip_names[i]))
6618                         return 1;
6619         return 0;
6620 }
6621
6622 void probe_sysfs(void)
6623 {
6624         char path[64];
6625         char name_buf[16];
6626         FILE *input;
6627         int state;
6628         char *sp;
6629
6630         for (state = 10; state >= 0; --state) {
6631
6632                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6633                 input = fopen(path, "r");
6634                 if (input == NULL)
6635                         continue;
6636                 if (!fgets(name_buf, sizeof(name_buf), input))
6637                         err(1, "%s: failed to read file", path);
6638
6639                 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6640                 sp = strchr(name_buf, '-');
6641                 if (!sp)
6642                         sp = strchrnul(name_buf, '\n');
6643                 *sp = '%';
6644                 *(sp + 1) = '\0';
6645
6646                 remove_underbar(name_buf);
6647
6648                 fclose(input);
6649
6650                 sprintf(path, "cpuidle/state%d/time", state);
6651
6652                 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
6653                         continue;
6654
6655                 if (is_deferred_skip(name_buf))
6656                         continue;
6657
6658                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU);
6659         }
6660
6661         for (state = 10; state >= 0; --state) {
6662
6663                 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6664                 input = fopen(path, "r");
6665                 if (input == NULL)
6666                         continue;
6667                 if (!fgets(name_buf, sizeof(name_buf), input))
6668                         err(1, "%s: failed to read file", path);
6669                 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6670                 sp = strchr(name_buf, '-');
6671                 if (!sp)
6672                         sp = strchrnul(name_buf, '\n');
6673                 *sp = '\0';
6674                 fclose(input);
6675
6676                 remove_underbar(name_buf);
6677
6678                 sprintf(path, "cpuidle/state%d/usage", state);
6679
6680                 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
6681                         continue;
6682
6683                 if (is_deferred_skip(name_buf))
6684                         continue;
6685
6686                 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU);
6687         }
6688
6689 }
6690
6691 /*
6692  * parse cpuset with following syntax
6693  * 1,2,4..6,8-10 and set bits in cpu_subset
6694  */
6695 void parse_cpu_command(char *optarg)
6696 {
6697         unsigned int start, end;
6698         char *next;
6699
6700         if (!strcmp(optarg, "core")) {
6701                 if (cpu_subset)
6702                         goto error;
6703                 show_core_only++;
6704                 return;
6705         }
6706         if (!strcmp(optarg, "package")) {
6707                 if (cpu_subset)
6708                         goto error;
6709                 show_pkg_only++;
6710                 return;
6711         }
6712         if (show_core_only || show_pkg_only)
6713                 goto error;
6714
6715         cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
6716         if (cpu_subset == NULL)
6717                 err(3, "CPU_ALLOC");
6718         cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
6719
6720         CPU_ZERO_S(cpu_subset_size, cpu_subset);
6721
6722         next = optarg;
6723
6724         while (next && *next) {
6725
6726                 if (*next == '-')       /* no negative cpu numbers */
6727                         goto error;
6728
6729                 start = strtoul(next, &next, 10);
6730
6731                 if (start >= CPU_SUBSET_MAXCPUS)
6732                         goto error;
6733                 CPU_SET_S(start, cpu_subset_size, cpu_subset);
6734
6735                 if (*next == '\0')
6736                         break;
6737
6738                 if (*next == ',') {
6739                         next += 1;
6740                         continue;
6741                 }
6742
6743                 if (*next == '-') {
6744                         next += 1;      /* start range */
6745                 } else if (*next == '.') {
6746                         next += 1;
6747                         if (*next == '.')
6748                                 next += 1;      /* start range */
6749                         else
6750                                 goto error;
6751                 }
6752
6753                 end = strtoul(next, &next, 10);
6754                 if (end <= start)
6755                         goto error;
6756
6757                 while (++start <= end) {
6758                         if (start >= CPU_SUBSET_MAXCPUS)
6759                                 goto error;
6760                         CPU_SET_S(start, cpu_subset_size, cpu_subset);
6761                 }
6762
6763                 if (*next == ',')
6764                         next += 1;
6765                 else if (*next != '\0')
6766                         goto error;
6767         }
6768
6769         return;
6770
6771 error:
6772         fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
6773         help();
6774         exit(-1);
6775 }
6776
6777 void cmdline(int argc, char **argv)
6778 {
6779         int opt;
6780         int option_index = 0;
6781         static struct option long_options[] = {
6782                 { "add", required_argument, 0, 'a' },
6783                 { "cpu", required_argument, 0, 'c' },
6784                 { "Dump", no_argument, 0, 'D' },
6785                 { "debug", no_argument, 0, 'd' },       /* internal, not documented */
6786                 { "enable", required_argument, 0, 'e' },
6787                 { "interval", required_argument, 0, 'i' },
6788                 { "IPC", no_argument, 0, 'I' },
6789                 { "num_iterations", required_argument, 0, 'n' },
6790                 { "header_iterations", required_argument, 0, 'N' },
6791                 { "help", no_argument, 0, 'h' },
6792                 { "hide", required_argument, 0, 'H' },  // meh, -h taken by --help
6793                 { "Joules", no_argument, 0, 'J' },
6794                 { "list", no_argument, 0, 'l' },
6795                 { "out", required_argument, 0, 'o' },
6796                 { "quiet", no_argument, 0, 'q' },
6797                 { "show", required_argument, 0, 's' },
6798                 { "Summary", no_argument, 0, 'S' },
6799                 { "TCC", required_argument, 0, 'T' },
6800                 { "version", no_argument, 0, 'v' },
6801                 { 0, 0, 0, 0 }
6802         };
6803
6804         progname = argv[0];
6805
6806         while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
6807                 switch (opt) {
6808                 case 'a':
6809                         parse_add_command(optarg);
6810                         break;
6811                 case 'c':
6812                         parse_cpu_command(optarg);
6813                         break;
6814                 case 'D':
6815                         dump_only++;
6816                         break;
6817                 case 'e':
6818                         /* --enable specified counter */
6819                         bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
6820                         break;
6821                 case 'd':
6822                         debug++;
6823                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6824                         break;
6825                 case 'H':
6826                         /*
6827                          * --hide: do not show those specified
6828                          *  multiple invocations simply clear more bits in enabled mask
6829                          */
6830                         bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
6831                         break;
6832                 case 'h':
6833                 default:
6834                         help();
6835                         exit(1);
6836                 case 'i':
6837                         {
6838                                 double interval = strtod(optarg, NULL);
6839
6840                                 if (interval < 0.001) {
6841                                         fprintf(outf, "interval %f seconds is too small\n", interval);
6842                                         exit(2);
6843                                 }
6844
6845                                 interval_tv.tv_sec = interval_ts.tv_sec = interval;
6846                                 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
6847                                 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
6848                         }
6849                         break;
6850                 case 'J':
6851                         rapl_joules++;
6852                         break;
6853                 case 'l':
6854                         ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
6855                         list_header_only++;
6856                         quiet++;
6857                         break;
6858                 case 'o':
6859                         outf = fopen_or_die(optarg, "w");
6860                         break;
6861                 case 'q':
6862                         quiet = 1;
6863                         break;
6864                 case 'n':
6865                         num_iterations = strtod(optarg, NULL);
6866
6867                         if (num_iterations <= 0) {
6868                                 fprintf(outf, "iterations %d should be positive number\n", num_iterations);
6869                                 exit(2);
6870                         }
6871                         break;
6872                 case 'N':
6873                         header_iterations = strtod(optarg, NULL);
6874
6875                         if (header_iterations <= 0) {
6876                                 fprintf(outf, "iterations %d should be positive number\n", header_iterations);
6877                                 exit(2);
6878                         }
6879                         break;
6880                 case 's':
6881                         /*
6882                          * --show: show only those specified
6883                          *  The 1st invocation will clear and replace the enabled mask
6884                          *  subsequent invocations can add to it.
6885                          */
6886                         if (shown == 0)
6887                                 bic_enabled = bic_lookup(optarg, SHOW_LIST);
6888                         else
6889                                 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
6890                         shown = 1;
6891                         break;
6892                 case 'S':
6893                         summary_only++;
6894                         break;
6895                 case 'T':
6896                         tj_max_override = atoi(optarg);
6897                         break;
6898                 case 'v':
6899                         print_version();
6900                         exit(0);
6901                         break;
6902                 }
6903         }
6904 }
6905
6906 int main(int argc, char **argv)
6907 {
6908         outf = stderr;
6909         cmdline(argc, argv);
6910
6911         if (!quiet) {
6912                 print_version();
6913                 print_bootcmd();
6914         }
6915
6916         probe_sysfs();
6917
6918         turbostat_init();
6919
6920         msr_sum_record();
6921
6922         /* dump counters and exit */
6923         if (dump_only)
6924                 return get_and_dump_counters();
6925
6926         /* list header and exit */
6927         if (list_header_only) {
6928                 print_header(",");
6929                 flush_output_stdout();
6930                 return 0;
6931         }
6932
6933         /*
6934          * if any params left, it must be a command to fork
6935          */
6936         if (argc - optind)
6937                 return fork_it(argv + optind);
6938         else
6939                 turbostat_loop();
6940
6941         return 0;
6942 }