tools/power turbostat: Add --no-perf option
[linux-block.git] / tools / power / x86 / turbostat / turbostat.c
index 7a334377f92b978fa642a0071b19f33d7e6fe74e..bad2fec7f3424a6b38ed84f1728a1178559fcec2 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/perf_event.h>
 #include <asm/unistd.h>
 #include <stdbool.h>
+#include <assert.h>
 
 #define UNUSED(x) (void)(x)
 
@@ -53,6 +54,8 @@
 #define        NAME_BYTES 20
 #define PATH_BYTES 128
 
+#define MAX_NOFILE 0x8000
+
 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
@@ -263,6 +266,8 @@ unsigned int has_hwp_epp;   /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;      /* IA32_HWP_REQUEST_PKG */
 unsigned int first_counter_read = 1;
 int ignore_stdin;
+bool no_msr;
+bool no_perf;
 
 int get_msr(int cpu, off_t offset, unsigned long long *msr);
 
@@ -989,8 +994,8 @@ struct pkg_data {
        unsigned long long pc8;
        unsigned long long pc9;
        unsigned long long pc10;
-       unsigned long long cpu_lpi;
-       unsigned long long sys_lpi;
+       long long cpu_lpi;
+       long long sys_lpi;
        unsigned long long pkg_wtd_core_c0;
        unsigned long long pkg_any_core_c0;
        unsigned long long pkg_any_gfxe_c0;
@@ -1280,15 +1285,47 @@ int get_msr_fd(int cpu)
        sprintf(pathname, "/dev/cpu/%d/msr", cpu);
        fd = open(pathname, O_RDONLY);
        if (fd < 0)
-               err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+               err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+                   "or run with --no-msr, or run as root", pathname);
 
        fd_percpu[cpu] = fd;
 
        return fd;
 }
 
+static void bic_disable_msr_access(void)
+{
+       const unsigned long bic_msrs =
+           BIC_Avg_MHz |
+           BIC_Busy |
+           BIC_Bzy_MHz |
+           BIC_SMI |
+           BIC_CPU_c1 |
+           BIC_CPU_c3 |
+           BIC_CPU_c6 |
+           BIC_CPU_c7 |
+           BIC_Mod_c6 |
+           BIC_CoreTmp |
+           BIC_Totl_c0 |
+           BIC_Any_c0 |
+           BIC_GFX_c0 |
+           BIC_CPUGFX |
+           BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+       bic_enabled &= ~bic_msrs;
+}
+
+static void bic_disable_perf_access(void)
+{
+       const unsigned long bic_perf = BIC_IPC;
+
+       bic_enabled &= ~bic_perf;
+}
+
 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
 {
+       assert(!no_perf);
+
        return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
 }
 
@@ -1305,8 +1342,8 @@ static int perf_instr_count_open(int cpu_num)
        /* counter for cpu_num, including user + kernel and all processes */
        fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
        if (fd == -1) {
-               warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
-               BIC_NOT_PRESENT(BIC_IPC);
+               warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\" or use --no-perf", progname);
+               bic_disable_perf_access();
        }
 
        return fd;
@@ -1326,6 +1363,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 {
        ssize_t retval;
 
+       assert(!no_msr);
+
        retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 
        if (retval != sizeof *msr)
@@ -1369,6 +1408,8 @@ void help(void)
                "               Override default 5-second measurement interval\n"
                "  -J, --Joules displays energy in Joules instead of Watts\n"
                "  -l, --list   list column headers only\n"
+               "  -M, --no-msr Disable all uses of the MSR driver\n"
+               "  -P, --no-perf Disable all uses of the perf API\n"
                "  -n, --num_iterations num\n"
                "               number of the measurement iterations\n"
                "  -N, --header_iterations num\n"
@@ -1671,11 +1712,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                        outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
                for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+                       outp +=
+                           sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   t->counter[i], mp->path);
                }
        }
 
-       if (c) {
+       if (c && is_cpu_first_thread_in_core(t, c, p)) {
                outp += sprintf(outp, "core: %d\n", c->core_id);
                outp += sprintf(outp, "c3: %016llX\n", c->c3);
                outp += sprintf(outp, "c6: %016llX\n", c->c6);
@@ -1685,12 +1728,14 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
 
                for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+                       outp +=
+                           sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   c->counter[i], mp->path);
                }
                outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
        }
 
-       if (p) {
+       if (p && is_cpu_first_core_in_package(t, c, p)) {
                outp += sprintf(outp, "package: %d\n", p->package_id);
 
                outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -1719,7 +1764,9 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
 
                for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+                       outp +=
+                           sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   p->counter[i], mp->path);
                }
        }
 
@@ -1976,12 +2023,22 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        if (DO_BIC(BIC_Pkgpc10))
                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
 
-       if (DO_BIC(BIC_CPU_LPI))
-               outp +=
-                   sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
-       if (DO_BIC(BIC_SYS_LPI))
-               outp +=
-                   sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+       if (DO_BIC(BIC_CPU_LPI)) {
+               if (p->cpu_lpi >= 0)
+                       outp +=
+                           sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                   100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+               else
+                       outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+       }
+       if (DO_BIC(BIC_SYS_LPI)) {
+               if (p->sys_lpi >= 0)
+                       outp +=
+                           sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                   100.0 * p->sys_lpi / 1000000.0 / interval_float);
+               else
+                       outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+       }
 
        if (DO_BIC(BIC_PkgWatt))
                outp +=
@@ -2444,9 +2501,10 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
 
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-               if (mp->format == FORMAT_RAW)
-                       continue;
-               average.packages.counter[i] += p->counter[i];
+               if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+                       average.packages.counter[i] = p->counter[i];
+               else
+                       average.packages.counter[i] += p->counter[i];
        }
        return 0;
 }
@@ -2578,6 +2636,7 @@ unsigned long long snapshot_sysfs_counter(char *path)
 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 {
        if (mp->msr_num != 0) {
+               assert(!no_msr);
                if (get_msr(cpu, mp->msr_num, counterp))
                        return -1;
        } else {
@@ -2627,6 +2686,9 @@ int get_epb(int cpu)
        return epb;
 
 msr_fallback:
+       if (no_msr)
+               return -1;
+
        get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
 
        return msr & 0xf;
@@ -2846,7 +2908,7 @@ retry:
        if (DO_BIC(BIC_CORE_THROT_CNT))
                get_core_throt_cnt(cpu, &c->core_throt_cnt);
 
-       if (platform->rapl_msrs & RAPL_AMD_F17H) {
+       if ((platform->rapl_msrs & RAPL_AMD_F17H) && !no_msr) {
                if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
                        return -14;
                c->core_energy = msr & 0xFFFFFFFF;
@@ -2911,41 +2973,44 @@ retry:
        if (DO_BIC(BIC_SYS_LPI))
                p->sys_lpi = cpuidle_cur_sys_lpi_us;
 
-       if (platform->rapl_msrs & RAPL_PKG) {
-               if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
-                       return -13;
-               p->energy_pkg = msr;
-       }
-       if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
-               if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
-                       return -14;
-               p->energy_cores = msr;
-       }
-       if (platform->rapl_msrs & RAPL_DRAM) {
-               if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
-                       return -15;
-               p->energy_dram = msr;
-       }
-       if (platform->rapl_msrs & RAPL_GFX) {
-               if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
-                       return -16;
-               p->energy_gfx = msr;
-       }
-       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
-               if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
-                       return -16;
-               p->rapl_pkg_perf_status = msr;
-       }
-       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
-               if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
-                       return -16;
-               p->rapl_dram_perf_status = msr;
-       }
-       if (platform->rapl_msrs & RAPL_AMD_F17H) {
-               if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
-                       return -13;
-               p->energy_pkg = msr;
+       if (!no_msr) {
+               if (platform->rapl_msrs & RAPL_PKG) {
+                       if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
+                               return -13;
+                       p->energy_pkg = msr;
+               }
+               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
+                       if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
+                               return -14;
+                       p->energy_cores = msr;
+               }
+               if (platform->rapl_msrs & RAPL_DRAM) {
+                       if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
+                               return -15;
+                       p->energy_dram = msr;
+               }
+               if (platform->rapl_msrs & RAPL_GFX) {
+                       if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
+                               return -16;
+                       p->energy_gfx = msr;
+               }
+               if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
+                       if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
+                               return -16;
+                       p->rapl_pkg_perf_status = msr;
+               }
+               if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
+                       if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
+                               return -16;
+                       p->rapl_dram_perf_status = msr;
+               }
+               if (platform->rapl_msrs & RAPL_AMD_F17H) {
+                       if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
+                               return -13;
+                       p->energy_pkg = msr;
+               }
        }
+
        if (DO_BIC(BIC_PkgTmp)) {
                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
                        return -17;
@@ -3053,7 +3118,7 @@ void probe_cst_limit(void)
        unsigned long long msr;
        int *pkg_cstate_limits;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        switch (platform->cst_limit) {
@@ -3097,7 +3162,7 @@ static void dump_platform_info(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -3115,7 +3180,7 @@ static void dump_power_ctl(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -3321,7 +3386,7 @@ static void dump_cst_cfg(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
@@ -3393,7 +3458,7 @@ void print_irtl(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_irtl_msrs)
+       if (!platform->has_irtl_msrs || no_msr)
                return;
 
        if (platform->supported_cstates & PC3) {
@@ -3829,7 +3894,8 @@ void re_initialize(void)
 {
        free_all_buffers();
        setup_all_buffers(false);
-       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+               topo.allowed_cpus);
 }
 
 void set_max_cpu_num(void)
@@ -4173,6 +4239,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
        int ret, idx;
        unsigned long long msr_cur, msr_last;
 
+       assert(!no_msr);
+
        if (!per_cpu_msr_sum)
                return 1;
 
@@ -4201,6 +4269,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
        UNUSED(c);
        UNUSED(p);
 
+       assert(!no_msr);
+
        for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
                unsigned long long msr_cur, msr_last;
                off_t offset;
@@ -4445,7 +4515,7 @@ void check_permissions(void)
        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
        if (euidaccess(pathname, R_OK)) {
                do_exit++;
-               warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+               warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr, or run with --no-msr");
        }
 
        /* if all else fails, thell them to be root */
@@ -4462,7 +4532,7 @@ void probe_bclk(void)
        unsigned long long msr;
        unsigned int base_ratio;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        if (platform->bclk_freq == BCLK_100MHZ)
@@ -4502,7 +4572,7 @@ static void dump_turbo_ratio_info(void)
        if (!has_turbo)
                return;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        if (platform->trl_msrs & TRL_LIMIT2)
@@ -4567,20 +4637,15 @@ static void dump_sysfs_file(char *path)
 static void probe_intel_uncore_frequency(void)
 {
        int i, j;
-       char path[128];
+       char path[256];
 
        if (!genuine_intel)
                return;
 
-       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
-               return;
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+               goto probe_cluster;
 
-       /* Cluster level sysfs not supported yet. */
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
-               return;
-
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
-               BIC_PRESENT(BIC_UNCORE_MHZ);
+       BIC_PRESENT(BIC_UNCORE_MHZ);
 
        if (quiet)
                return;
@@ -4588,26 +4653,73 @@ static void probe_intel_uncore_frequency(void)
        for (i = 0; i < topo.num_packages; ++i) {
                for (j = 0; j < topo.num_die; ++j) {
                        int k, l;
+                       char path_base[128];
+
+                       sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+                               j);
 
-                       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/min_freq_khz", path_base);
                        k = read_sysfs_int(path);
-                       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/max_freq_khz", path_base);
                        l = read_sysfs_int(path);
-                       fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+                       fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
 
-                       sprintf(path,
-                               "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/initial_min_freq_khz", path_base);
                        k = read_sysfs_int(path);
-                       sprintf(path,
-                               "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/initial_max_freq_khz", path_base);
                        l = read_sysfs_int(path);
-                       fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+                       fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+                       sprintf(path, "%s/current_freq_khz", path_base);
+                       k = read_sysfs_int(path);
+                       fprintf(outf, " %d MHz\n", k / 1000);
                }
        }
+       return;
+
+probe_cluster:
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+               return;
+
+       if (quiet)
+               return;
+
+       for (i = 0;; ++i) {
+               int k, l;
+               char path_base[128];
+               int package_id, domain_id, cluster_id;
+
+               sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+               if (access(path_base, R_OK))
+                       break;
+
+               sprintf(path, "%s/package_id", path_base);
+               package_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/domain_id", path_base);
+               domain_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/fabric_cluster_id", path_base);
+               cluster_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/min_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               sprintf(path, "%s/max_freq_khz", path_base);
+               l = read_sysfs_int(path);
+               fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+                       cluster_id, k / 1000, l / 1000);
+
+               sprintf(path, "%s/initial_min_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               sprintf(path, "%s/initial_max_freq_khz", path_base);
+               l = read_sysfs_int(path);
+               fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+               sprintf(path, "%s/current_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               fprintf(outf, " %d MHz\n", k / 1000);
+       }
 }
 
 static void probe_graphics(void)
@@ -4783,6 +4895,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        if (!has_hwp)
                return 0;
 
@@ -4869,6 +4984,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        cpu = t->cpu_id;
 
        /* per-package */
@@ -5202,7 +5320,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
  */
 void probe_rapl(void)
 {
-       if (!platform->rapl_msrs)
+       if (!platform->rapl_msrs || no_msr)
                return;
 
        if (genuine_intel)
@@ -5258,7 +5376,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        }
 
        /* Temperature Target MSR is Nehalem and newer only */
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                goto guess;
 
        if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5305,6 +5423,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        if (!(do_dts || do_ptm))
                return 0;
 
@@ -5402,6 +5523,9 @@ void decode_feature_control_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
                fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
                        base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
@@ -5411,6 +5535,9 @@ void decode_misc_enable_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!genuine_intel)
                return;
 
@@ -5428,6 +5555,9 @@ void decode_misc_feature_control(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_misc_feature_control)
                return;
 
@@ -5449,6 +5579,9 @@ void decode_misc_pwr_mgmt_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_misc_pwr_mgmt)
                return;
 
@@ -5468,6 +5601,9 @@ void decode_c6_demotion_policy_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_c6_demotion_policy_config)
                return;
 
@@ -5489,7 +5625,8 @@ void print_dev_latency(void)
 
        fd = open(path, O_RDONLY);
        if (fd < 0) {
-               warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+               if (debug)
+                       warnx("Read %s failed", path);
                return;
        }
 
@@ -5563,7 +5700,7 @@ void probe_cstates(void)
        if (platform->has_msr_module_c6_res_ms)
                BIC_PRESENT(BIC_Mod_c6);
 
-       if (platform->has_ext_cst_msrs) {
+       if (platform->has_ext_cst_msrs && !no_msr) {
                BIC_PRESENT(BIC_Totl_c0);
                BIC_PRESENT(BIC_Any_c0);
                BIC_PRESENT(BIC_GFX_c0);
@@ -5623,6 +5760,7 @@ void process_cpuid()
        unsigned int eax, ebx, ecx, edx;
        unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
        unsigned long long ucode_patch = 0;
+       bool ucode_patch_valid = false;
 
        eax = ebx = ecx = edx = 0;
 
@@ -5650,8 +5788,12 @@ void process_cpuid()
        ecx_flags = ecx;
        edx_flags = edx;
 
-       if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
-               warnx("get_msr(UCODE)");
+       if (!no_msr) {
+               if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+                       warnx("get_msr(UCODE)");
+               else
+                       ucode_patch_valid = true;
+       }
 
        /*
         * check max extended function levels of CPUID.
@@ -5662,9 +5804,12 @@ void process_cpuid()
        __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
 
        if (!quiet) {
-               fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
-                       family, model, stepping, family, model, stepping,
-                       (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+               fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+                       family, model, stepping, family, model, stepping);
+               if (ucode_patch_valid)
+                       fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+               fputc('\n', outf);
+
                fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
                fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
                        ecx_flags & (1 << 0) ? "SSE3" : "-",
@@ -5786,6 +5931,15 @@ void process_cpuid()
                base_mhz = max_mhz = bus_mhz = edx = 0;
 
                __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+               bclk = bus_mhz;
+
+               base_hz = base_mhz * 1000000;
+               has_base_hz = 1;
+
+               if (platform->enable_tsc_tweak)
+                       tsc_tweak = base_hz / tsc_hz;
+
                if (!quiet)
                        fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
                                base_mhz, max_mhz, bus_mhz);
@@ -5814,7 +5968,7 @@ void probe_pm_features(void)
 
        probe_thermal();
 
-       if (platform->has_nhm_msrs)
+       if (platform->has_nhm_msrs && !no_msr)
                BIC_PRESENT(BIC_SMI);
 
        if (!quiet)
@@ -6142,6 +6296,7 @@ void topology_update(void)
        topo.allowed_packages = 0;
        for_all_cpus(update_topo, ODD_COUNTERS);
 }
+
 void setup_all_buffers(bool startup)
 {
        topology_probe(startup);
@@ -6173,8 +6328,10 @@ void turbostat_init()
 {
        setup_all_buffers(true);
        set_base_cpu();
-       check_dev_msr();
-       check_permissions();
+       if (!no_msr) {
+               check_dev_msr();
+               check_permissions();
+       }
        process_cpuid();
        probe_pm_features();
        linux_perf_init();
@@ -6291,6 +6448,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
 {
        struct msr_counter *msrp;
 
+       if (no_msr && msr_num)
+               errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
        msrp = calloc(1, sizeof(struct msr_counter));
        if (msrp == NULL) {
                perror("calloc");
@@ -6595,6 +6755,8 @@ void cmdline(int argc, char **argv)
                { "list", no_argument, 0, 'l' },
                { "out", required_argument, 0, 'o' },
                { "quiet", no_argument, 0, 'q' },
+               { "no-msr", no_argument, 0, 'M' },
+               { "no-perf", no_argument, 0, 'P' },
                { "show", required_argument, 0, 's' },
                { "Summary", no_argument, 0, 'S' },
                { "TCC", required_argument, 0, 'T' },
@@ -6604,7 +6766,25 @@ void cmdline(int argc, char **argv)
 
        progname = argv[0];
 
-       while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+       /*
+        * Parse some options early, because they may make other options invalid,
+        * like adding the MSR counter with --add and at the same time using --no-msr.
+        */
+       while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+               switch (opt) {
+               case 'M':
+                       no_msr = 1;
+                       break;
+               case 'P':
+                       no_perf = 1;
+                       break;
+               default:
+                       break;
+               }
+       }
+       optind = 0;
+
+       while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
                switch (opt) {
                case 'a':
                        parse_add_command(optarg);
@@ -6662,6 +6842,10 @@ void cmdline(int argc, char **argv)
                case 'q':
                        quiet = 1;
                        break;
+               case 'M':
+               case 'P':
+                       /* Parsed earlier */
+                       break;
                case 'n':
                        num_iterations = strtod(optarg, NULL);
 
@@ -6704,6 +6888,22 @@ void cmdline(int argc, char **argv)
        }
 }
 
+void set_rlimit(void)
+{
+       struct rlimit limit;
+
+       if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+               err(1, "Failed to get rlimit");
+
+       if (limit.rlim_max < MAX_NOFILE)
+               limit.rlim_max = MAX_NOFILE;
+       if (limit.rlim_cur < MAX_NOFILE)
+               limit.rlim_cur = MAX_NOFILE;
+
+       if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+               err(1, "Failed to set rlimit");
+}
+
 int main(int argc, char **argv)
 {
        int fd, ret;
@@ -6722,6 +6922,12 @@ skip_cgroup_setting:
        outf = stderr;
        cmdline(argc, argv);
 
+       if (no_msr)
+               bic_disable_msr_access();
+
+       if (no_perf)
+               bic_disable_perf_access();
+
        if (!quiet) {
                print_version();
                print_bootcmd();
@@ -6729,9 +6935,13 @@ skip_cgroup_setting:
 
        probe_sysfs();
 
+       if (!getuid())
+               set_rlimit();
+
        turbostat_init();
 
-       msr_sum_record();
+       if (!no_msr)
+               msr_sum_record();
 
        /* dump counters and exit */
        if (dump_only)