Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 25 Dec 2016 22:30:04 +0000 (14:30 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 25 Dec 2016 22:30:04 +0000 (14:30 -0800)
Pull timer type cleanups from Thomas Gleixner:
 "This series does a tree wide cleanup of types related to
  timers/timekeeping.

   - Get rid of cycles_t and use a plain u64. The type is not really
     helpful and caused more confusion than clarity

   - Get rid of the ktime union. The union has become useless as we use
     the scalar nanoseconds storage unconditionally now. The 32bit
     timespec alike storage got removed due to the Y2038 limitations
     some time ago.

     That leaves the odd union access around for no reason. Clean it up.

  Both changes have been done with coccinelle and a small amount of
  manual mopping up"

* 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  ktime: Get rid of ktime_equal()
  ktime: Cleanup ktime_set() usage
  ktime: Get rid of the union
  clocksource: Use a plain u64 instead of cycle_t

86 files changed:
arch/arm/kernel/smp_twd.c
arch/arm/mach-imx/mmdc.c
arch/arm/mach-mvebu/coherency.c
arch/arm/mm/cache-l2x0-pmu.c
arch/arm/mm/cache-l2x0.c
arch/arm/vfp/vfpmodule.c
arch/arm/xen/enlighten.c
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/debug-monitors.c
arch/arm64/kernel/hw_breakpoint.c
arch/blackfin/kernel/perf_event.c
arch/metag/kernel/perf/perf_event.c
arch/mips/kernel/pm-cps.c
arch/mips/oprofile/op_model_loongson3.c
arch/powerpc/mm/numa.c
arch/powerpc/perf/core-book3s.c
arch/s390/kernel/perf_cpum_cf.c
arch/s390/kernel/perf_cpum_sf.c
arch/x86/entry/vdso/vma.c
arch/x86/events/amd/ibs.c
arch/x86/events/amd/power.c
arch/x86/events/amd/uncore.c
arch/x86/events/core.c
arch/x86/events/intel/cqm.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/rapl.c
arch/x86/events/intel/uncore.c
arch/x86/kernel/apb_timer.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/hpet.c
arch/x86/kernel/msr.c
arch/x86/kernel/tboot.c
arch/x86/kvm/x86.c
arch/x86/xen/enlighten.c
arch/xtensa/kernel/perf_event.c
drivers/bus/arm-cci.c
drivers/bus/arm-ccn.c
drivers/clocksource/arc_timer.c
drivers/clocksource/arm_arch_timer.c
drivers/clocksource/arm_global_timer.c
drivers/clocksource/dummy_timer.c
drivers/clocksource/exynos_mct.c
drivers/clocksource/jcore-pit.c
drivers/clocksource/metag_generic.c
drivers/clocksource/mips-gic-timer.c
drivers/clocksource/qcom-timer.c
drivers/clocksource/time-armada-370-xp.c
drivers/clocksource/timer-atlas7.c
drivers/hwtracing/coresight/coresight-etm3x.c
drivers/hwtracing/coresight/coresight-etm4x.c
drivers/irqchip/irq-armada-370-xp.c
drivers/irqchip/irq-bcm2836.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-hip04.c
drivers/leds/trigger/ledtrig-cpu.c
drivers/net/virtio_net.c
drivers/perf/arm_pmu.c
drivers/scsi/bnx2fc/bnx2fc_fcoe.c
drivers/scsi/bnx2i/bnx2i_init.c
drivers/scsi/qedi/qedi_main.c
drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
drivers/xen/events/events_fifo.c
include/linux/cpu.h
include/linux/cpuhotplug.h
include/linux/mm.h
include/linux/page-flags.h
include/linux/pagemap.h
include/linux/writeback.h
include/trace/events/mmflags.h
init/main.c
kernel/cpu.c
lib/Kconfig.debug
lib/Makefile
lib/cpu-notifier-error-inject.c [deleted file]
mm/filemap.c
mm/internal.h
mm/memory-failure.c
mm/migrate.c
mm/swap.c
tools/power/x86/turbostat/Makefile
tools/power/x86/turbostat/turbostat.8
tools/power/x86/turbostat/turbostat.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/kvm_main.c

index 02d5e5e8d44cd0e55e216b4ab4469f5b386741f8..895ae5197159e36c34a61cbb6c404eced5698264 100644 (file)
@@ -339,7 +339,7 @@ static int __init twd_local_timer_common_register(struct device_node *np)
        }
 
        cpuhp_setup_state_nocalls(CPUHP_AP_ARM_TWD_STARTING,
-                                 "AP_ARM_TWD_STARTING",
+                                 "arm/timer/twd:starting",
                                  twd_timer_starting_cpu, twd_timer_dying_cpu);
 
        twd_get_clock(np);
index ba96bf979625d4da755fa08939eefe626cb79bdd..699157759120f13ed4047b05609e53cb419fffe5 100644 (file)
@@ -60,6 +60,7 @@
 
 #define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu)
 
+static enum cpuhp_state cpuhp_mmdc_state;
 static int ddr_type;
 
 struct fsl_mmdc_devtype_data {
@@ -451,8 +452,8 @@ static int imx_mmdc_remove(struct platform_device *pdev)
 {
        struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev);
 
+       cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
        perf_pmu_unregister(&pmu_mmdc->pmu);
-       cpuhp_remove_state_nocalls(CPUHP_ONLINE);
        kfree(pmu_mmdc);
        return 0;
 }
@@ -472,6 +473,18 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
                return -ENOMEM;
        }
 
+       /* The first instance registers the hotplug state */
+       if (!cpuhp_mmdc_state) {
+               ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+                                             "perf/arm/mmdc:online", NULL,
+                                             mmdc_pmu_offline_cpu);
+               if (ret < 0) {
+                       pr_err("cpuhp_setup_state_multi failed\n");
+                       goto pmu_free;
+               }
+               cpuhp_mmdc_state = ret;
+       }
+
        mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
        if (mmdc_num == 0)
                name = "mmdc";
@@ -485,26 +498,23 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b
                        HRTIMER_MODE_REL);
        pmu_mmdc->hrtimer.function = mmdc_pmu_timer_handler;
 
-       cpuhp_state_add_instance_nocalls(CPUHP_ONLINE,
-                                        &pmu_mmdc->node);
-       cpumask_set_cpu(smp_processor_id(), &pmu_mmdc->cpu);
-       ret = cpuhp_setup_state_multi(CPUHP_AP_NOTIFY_ONLINE,
-                                     "MMDC_ONLINE", NULL,
-                                     mmdc_pmu_offline_cpu);
-       if (ret) {
-               pr_err("cpuhp_setup_state_multi failure\n");
-               goto pmu_register_err;
-       }
+       cpumask_set_cpu(raw_smp_processor_id(), &pmu_mmdc->cpu);
+
+       /* Register the pmu instance for cpu hotplug */
+       cpuhp_state_add_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
 
        ret = perf_pmu_register(&(pmu_mmdc->pmu), name, -1);
-       platform_set_drvdata(pdev, pmu_mmdc);
        if (ret)
                goto pmu_register_err;
+
+       platform_set_drvdata(pdev, pmu_mmdc);
        return 0;
 
 pmu_register_err:
        pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret);
+       cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node);
        hrtimer_cancel(&pmu_mmdc->hrtimer);
+pmu_free:
        kfree(pmu_mmdc);
        return ret;
 }
index ae2a018b93050fa8171d2164124d85796e801bb6..8f8748a0c84f44229fec0eb4f3fa225202c2666a 100644 (file)
@@ -148,7 +148,7 @@ static void __init armada_370_coherency_init(struct device_node *np)
        of_node_put(cpu_config_np);
 
        cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
-                                 "AP_ARM_MVEBU_COHERENCY",
+                                 "arm/mvebu/coherency:starting",
                                  armada_xp_clear_l2_starting, NULL);
 exit:
        set_cpu_coherent();
index 976d3057272e0e128bdb2af30d8c2f220d4a5aa7..0a1e2280141f796e1fa62ed25352aa77606b2b95 100644 (file)
@@ -563,7 +563,7 @@ static __init int l2x0_pmu_init(void)
 
        cpumask_set_cpu(0, &pmu_cpu);
        ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE,
-                                       "AP_PERF_ARM_L2X0_ONLINE", NULL,
+                                       "perf/arm/l2x0:online", NULL,
                                        l2x0_pmu_offline_cpu);
        if (ret)
                goto out_pmu;
index d1870c777c6e2decbac8a2cd58c73c10c9e30ede..2290be390f87b15c48cd9cfc67a08386e1148999 100644 (file)
@@ -683,7 +683,7 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
 
        if (aux & L310_AUX_CTRL_FULL_LINE_ZERO)
                cpuhp_setup_state(CPUHP_AP_ARM_L2X0_STARTING,
-                                 "AP_ARM_L2X0_STARTING", l2c310_starting_cpu,
+                                 "arm/l2x0:starting", l2c310_starting_cpu,
                                  l2c310_dying_cpu);
 }
 
index 0351f5645fb11c7f086e4ad6ea6ab4e9bdd3cfae..569d5a650a4a2c6266ddf8fc6d38e0cd96b985f6 100644 (file)
@@ -799,7 +799,7 @@ static int __init vfp_init(void)
        }
 
        cpuhp_setup_state_nocalls(CPUHP_AP_ARM_VFP_STARTING,
-                                 "AP_ARM_VFP_STARTING", vfp_starting_cpu,
+                                 "arm/vfp:starting", vfp_starting_cpu,
                                  vfp_dying_cpu);
 
        vfp_vector = vfp_support_entry;
index 4986dc0c1dff05f564756d584cde2c06c4ee01ce..11d9f2898b16441b4ccac1dc111fe6c6c5a9ed7d 100644 (file)
@@ -412,7 +412,7 @@ static int __init xen_guest_init(void)
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 
        return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING,
-                                "AP_ARM_XEN_STARTING", xen_starting_cpu,
+                                "arm/xen:starting", xen_starting_cpu,
                                 xen_dying_cpu);
 }
 early_initcall(xen_guest_init);
index fde04f029ec34171fe0a0683f10ebb84a0ee8582..ecf9298a12d48f11d5833dca0584d8ab028cf422 100644 (file)
@@ -640,7 +640,7 @@ static int __init armv8_deprecated_init(void)
        }
 
        cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
-                                 "AP_ARM64_ISNDEP_STARTING",
+                                 "arm64/isndep:starting",
                                  run_all_insn_set_hw_mode, NULL);
        register_insn_emulation_sysctl(ctl_abi);
 
index 605df76f0a06c161b4adf64bb15e90d88328f82e..2bd426448fc190ce52a08208e9ff07fe7fa40a23 100644 (file)
@@ -140,7 +140,7 @@ static int clear_os_lock(unsigned int cpu)
 static int debug_monitors_init(void)
 {
        return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
-                                "CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING",
+                                "arm64/debug_monitors:starting",
                                 clear_os_lock, NULL);
 }
 postcore_initcall(debug_monitors_init);
index 1b3c747fedda51060d30fbd8a337870ca0291d6c..0296e79242402008837da11d97cfe682d711c18e 100644 (file)
@@ -1001,7 +1001,7 @@ static int __init arch_hw_breakpoint_init(void)
         * debugger will leave the world in a nice state for us.
         */
        ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
-                         "CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING",
+                         "perf/arm64/hw_breakpoint:starting",
                          hw_breakpoint_reset, NULL);
        if (ret)
                pr_err("failed to register CPU hotplug notifier: %d\n", ret);
index 6355e97d22b9b29fc7a875183c44812a90cd9ca5..6a9524ad04a5dcec696025a36aa17bea47950811 100644 (file)
@@ -475,7 +475,7 @@ static int __init bfin_pmu_init(void)
 
        ret = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        if (!ret)
-               cpuhp_setup_state(CPUHP_PERF_BFIN, "PERF_BFIN",
+               cpuhp_setup_state(CPUHP_PERF_BFIN,"perf/bfin:starting",
                                  bfin_pmu_prepare_cpu, NULL);
        return ret;
 }
index 052cba23708c57bff289c74b386440268d7c2ba1..7e793eb0c1fe1b7053d709e04612a51f9b8456af 100644 (file)
@@ -868,7 +868,7 @@ static int __init init_hw_perf_events(void)
        metag_out32(0, PERF_COUNT(1));
 
        cpuhp_setup_state(CPUHP_AP_PERF_METAG_STARTING,
-                         "AP_PERF_METAG_STARTING", metag_pmu_starting_cpu,
+                         "perf/metag:starting", metag_pmu_starting_cpu,
                          NULL);
 
        ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW);
index 7cf653e214237f75b22200c94f0e47c696be14ae..5f928c34c1489c1b054b105a37296f72d863d6f7 100644 (file)
@@ -713,7 +713,7 @@ static int __init cps_pm_init(void)
                pr_warn("pm-cps: no CPC, clock & power gating unavailable\n");
        }
 
-       return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PM_CPS_CPU_ONLINE",
+       return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mips/cps_pm:online",
                                 cps_pm_online_cpu, NULL);
 }
 arch_initcall(cps_pm_init);
index 40660392006f2bf2ec4f59f7aca3ad3ff1a426d7..436b1fc99f2ca82a9f994cd465f1e3a742659fd1 100644 (file)
@@ -186,7 +186,7 @@ static int __init loongson3_init(void)
 {
        on_each_cpu(reset_counters, NULL, 1);
        cpuhp_setup_state_nocalls(CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
-                                 "AP_MIPS_OP_LOONGSON3_STARTING",
+                                 "mips/oprofile/loongson3:starting",
                                  loongson3_starting_cpu, loongson3_dying_cpu);
        save_perf_irq = perf_irq;
        perf_irq = loongson3_perfcount_handler;
index 0cb6bd8bfccfd7139b5148a60bd486a1fe8bfcdd..b1099cb2f393c8adbfa7f1ac42f3db478e177efc 100644 (file)
@@ -944,7 +944,7 @@ void __init initmem_init(void)
         * _nocalls() + manual invocation is used because cpuhp is not yet
         * initialized for the boot CPU.
         */
-       cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE",
+       cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
                                  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
        for_each_present_cpu(cpu)
                numa_setup_cpu(cpu);
index 72c27b8d2cf3240f201eabeb723afcdb0ff9b0de..fd3e4034c04d2207a30cc82d6c65dffc6094c603 100644 (file)
@@ -2189,7 +2189,7 @@ int register_power_pmu(struct power_pmu *pmu)
 #endif /* CONFIG_PPC64 */
 
        perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
-       cpuhp_setup_state(CPUHP_PERF_POWER, "PERF_POWER",
+       cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
                          power_pmu_prepare_cpu, NULL);
        return 0;
 }
index 037c2a253ae48fc977cdb520e21c86c745507624..1aba10e9090632ad7e644f2f066c4c405f1ce530 100644 (file)
@@ -711,7 +711,7 @@ static int __init cpumf_pmu_init(void)
                return rc;
        }
        return cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
-                                "AP_PERF_S390_CF_ONLINE",
+                                "perf/s390/cf:online",
                                 s390_pmu_online_cpu, s390_pmu_offline_cpu);
 }
 early_initcall(cpumf_pmu_init);
index 763dec18edcdd9ea763dfa18beeeddbc6ca37c9d..1c0b58545c04d6f5c44e0f07838fcc35901e0cf5 100644 (file)
@@ -1623,7 +1623,7 @@ static int __init init_cpum_sampling_pmu(void)
                goto out;
        }
 
-       cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "AP_PERF_S390_SF_ONLINE",
+       cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online",
                          s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
 out:
        return err;
index 40121d14d34d140b5dcd8b14ed0143d9f6b70a02..10820f6cefbf020737d1729fb92a7217fe0827fe 100644 (file)
@@ -371,7 +371,7 @@ static int __init init_vdso(void)
 
        /* notifier priority > KVM */
        return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
-                                "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL);
+                                "x86/vdso/vma:online", vgetcpu_online, NULL);
 }
 subsys_initcall(init_vdso);
 #endif /* CONFIG_X86_64 */
index b26ee32f73e8af227901b2b7f6d0a5ad620f65a3..05612a2529c8bba1e9aa9131a4cabaebe96d5736 100644 (file)
@@ -1010,7 +1010,7 @@ static __init int amd_ibs_init(void)
         * all online cpus.
         */
        cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
-                         "AP_PERF_X86_AMD_IBS_STARTING",
+                         "perf/x86/amd/ibs:STARTING",
                          x86_pmu_amd_ibs_starting_cpu,
                          x86_pmu_amd_ibs_dying_cpu);
 
index 9842270ed2f20b27f587ef36fefb86a6ff17a951..a6eee5ac4f581d927e9862cfbde643971fc76cf8 100644 (file)
@@ -291,7 +291,7 @@ static int __init amd_power_pmu_init(void)
 
 
        cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
-                         "AP_PERF_X86_AMD_POWER_ONLINE",
+                         "perf/x86/amd/power:online",
                          power_cpu_init, power_cpu_exit);
 
        ret = perf_pmu_register(&pmu_class, "power", -1);
index 65577f081d072c5f774f763adc19a0e7a87e5627..a0b1bdb3ad421ed93746718717d47eb84904e86f 100644 (file)
@@ -527,16 +527,16 @@ static int __init amd_uncore_init(void)
         * Install callbacks. Core will call them for each online cpu.
         */
        if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
-                             "PERF_X86_AMD_UNCORE_PREP",
+                             "perf/x86/amd/uncore:prepare",
                              amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
                goto fail_l2;
 
        if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
-                             "AP_PERF_X86_AMD_UNCORE_STARTING",
+                             "perf/x86/amd/uncore:starting",
                              amd_uncore_cpu_starting, NULL))
                goto fail_prep;
        if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
-                             "AP_PERF_X86_AMD_UNCORE_ONLINE",
+                             "perf/x86/amd/uncore:online",
                              amd_uncore_cpu_online,
                              amd_uncore_cpu_down_prepare))
                goto fail_start;
index f1c22584a46f50342d56a642ca83db1504983b0e..019c5887b698af2a5fbf322c7cc5d6d3f8ab7e57 100644 (file)
@@ -1820,18 +1820,18 @@ static int __init init_hw_perf_events(void)
         * Install callbacks. Core will call them for each online
         * cpu.
         */
-       err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE",
+       err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "perf/x86:prepare",
                                x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
        if (err)
                return err;
 
        err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
-                               "AP_PERF_X86_STARTING", x86_pmu_starting_cpu,
+                               "perf/x86:starting", x86_pmu_starting_cpu,
                                x86_pmu_dying_cpu);
        if (err)
                goto out;
 
-       err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE",
+       err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "perf/x86:online",
                                x86_pmu_online_cpu, NULL);
        if (err)
                goto out1;
index 0c45cc8e64ba77f6988cff5a0e5443dfe0449435..8c00dc09a5d2cf41e0e0af5f6eb76577f4f0df09 100644 (file)
@@ -1747,9 +1747,9 @@ static int __init intel_cqm_init(void)
         * is enabled to avoid notifier leak.
         */
        cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING,
-                         "AP_PERF_X86_CQM_STARTING",
+                         "perf/x86/cqm:starting",
                          intel_cqm_cpu_starting, NULL);
-       cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "AP_PERF_X86_CQM_ONLINE",
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "perf/x86/cqm:online",
                          NULL, intel_cqm_cpu_exit);
 
 out:
index da51e5a3e2ff799d54257aa3db5151b4723c8292..fec8a461bdef6da49c0e7f46c655f235e7fc1275 100644 (file)
@@ -594,6 +594,9 @@ static int __init cstate_probe(const struct cstate_model *cm)
 
 static inline void cstate_cleanup(void)
 {
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
+
        if (has_cstate_core)
                perf_pmu_unregister(&cstate_core_pmu);
 
@@ -606,16 +609,16 @@ static int __init cstate_init(void)
        int err;
 
        cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
-                         "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init,
-                         NULL);
+                         "perf/x86/cstate:starting", cstate_cpu_init, NULL);
        cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
-                         "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit);
+                         "perf/x86/cstate:online", NULL, cstate_cpu_exit);
 
        if (has_cstate_core) {
                err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
                if (err) {
                        has_cstate_core = false;
                        pr_info("Failed to register cstate core pmu\n");
+                       cstate_cleanup();
                        return err;
                }
        }
@@ -629,8 +632,7 @@ static int __init cstate_init(void)
                        return err;
                }
        }
-
-       return err;
+       return 0;
 }
 
 static int __init cstate_pmu_init(void)
@@ -655,8 +657,6 @@ module_init(cstate_pmu_init);
 
 static void __exit cstate_pmu_exit(void)
 {
-       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
-       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
        cstate_cleanup();
 }
 module_exit(cstate_pmu_exit);
index 0a535cea8ff31adf6e06fc32ff763d423a73ab74..bd34124449b08a4be95f114b3295c22e4d57cd34 100644 (file)
@@ -803,13 +803,13 @@ static int __init rapl_pmu_init(void)
         * Install callbacks. Core will call them for each online cpu.
         */
 
-       ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP",
+       ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare",
                                rapl_cpu_prepare, NULL);
        if (ret)
                goto out;
 
        ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
-                               "AP_PERF_X86_RAPL_ONLINE",
+                               "perf/x86/rapl:online",
                                rapl_cpu_online, rapl_cpu_offline);
        if (ret)
                goto out1;
index dbaaf7dc8373cb0248a4637abe4a211f266fb2db..97c246f84dea1e79d0f4517376763dcb88a18aea 100644 (file)
@@ -1398,22 +1398,22 @@ static int __init intel_uncore_init(void)
         */
        if (!cret) {
               ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
-                                       "PERF_X86_UNCORE_PREP",
-                                       uncore_cpu_prepare, NULL);
+                                      "perf/x86/intel/uncore:prepare",
+                                      uncore_cpu_prepare, NULL);
                if (ret)
                        goto err;
        } else {
                cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
-                                         "PERF_X86_UNCORE_PREP",
+                                         "perf/x86/intel/uncore:prepare",
                                          uncore_cpu_prepare, NULL);
        }
        first_init = 1;
        cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
-                         "AP_PERF_X86_UNCORE_STARTING",
+                         "perf/x86/uncore:starting",
                          uncore_cpu_starting, uncore_cpu_dying);
        first_init = 0;
        cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
-                         "AP_PERF_X86_UNCORE_ONLINE",
+                         "perf/x86/uncore:online",
                          uncore_event_cpu_online, uncore_event_cpu_offline);
        return 0;
 
index 092ea664d2c6b783e176e40b98a4b2515875f6bd..65721dc73bd83b982154c5d5e0f52e06b0ccad29 100644 (file)
@@ -234,7 +234,7 @@ static __init int apbt_late_init(void)
        if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ||
                !apb_timer_block_enabled)
                return 0;
-       return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "X86_APB_DEAD", NULL,
+       return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "x86/apb:dead", NULL,
                                 apbt_cpu_dead);
 }
 fs_initcall(apbt_late_init);
index 200af5ae96626e1610c7c946bb0a33c5360f9bcb..5a35f208ed95909d339db672cb493445c24a1a81 100644 (file)
@@ -191,7 +191,7 @@ static int x2apic_cluster_probe(void)
        if (!x2apic_mode)
                return 0;
 
-       ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+       ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
                                x2apic_prepare_cpu, x2apic_dead_cpu);
        if (ret < 0) {
                pr_err("Failed to register X2APIC_PREPARE\n");
index 367756d55980fd02b94fe72a7c65f628421e089c..85e87b46c318026ed28d87056c516aec3e5fb9ed 100644 (file)
@@ -1051,11 +1051,11 @@ static __init int hpet_late_init(void)
                return 0;
 
        /* This notifier should be called after workqueue is ready */
-       ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "AP_X86_HPET_ONLINE",
+       ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online",
                                hpet_cpuhp_online, NULL);
        if (ret)
                return ret;
-       ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "X86_HPET_DEAD", NULL,
+       ret = cpuhp_setup_state(CPUHP_X86_HPET_DEAD, "x86/hpet:dead", NULL,
                                hpet_cpuhp_dead);
        if (ret)
                goto err_cpuhp;
index f5e3ff835cc8a318c12c3decc8532cc9df411b69..ef688804f80d33088fef15448996a97f69e2b193 100644 (file)
@@ -224,7 +224,6 @@ static int __init msr_init(void)
        return 0;
 
 out_class:
-       cpuhp_remove_state(cpuhp_msr_state);
        class_destroy(msr_class);
 out_chrdev:
        __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
index 8402907825b02706f87283c45b5e27086a9cce0b..b868fa1b812b3a82713e0556c39900bced73f338 100644 (file)
@@ -408,7 +408,7 @@ static __init int tboot_late_init(void)
        tboot_create_trampoline();
 
        atomic_set(&ap_wfs_count, 0);
-       cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "AP_X86_TBOOT_DYING", NULL,
+       cpuhp_setup_state(CPUHP_AP_X86_TBOOT_DYING, "x86/tboot:dying", NULL,
                          tboot_dying_cpu);
 #ifdef CONFIG_DEBUG_FS
        debugfs_create_file("tboot_log", S_IRUSR,
index ed04398f52c1bf30ed9a1ec0d62c5a4ee91adbd4..51ccfe08e32ff0570517fda01277dc0085721e94 100644 (file)
@@ -5855,7 +5855,7 @@ static void kvm_timer_init(void)
        }
        pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
 
-       cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+       cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
                          kvmclock_cpu_online, kvmclock_cpu_down_prep);
 }
 
index ced7027b3fbc299a32daa666fa77c6afa2739599..51ef952327257cb2a787b6743805e4cb677ecafc 100644 (file)
@@ -1529,11 +1529,11 @@ static int xen_cpuhp_setup(void)
        int rc;
 
        rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE,
-                                      "XEN_HVM_GUEST_PREPARE",
+                                      "x86/xen/hvm_guest:prepare",
                                       xen_cpu_up_prepare, xen_cpu_dead);
        if (rc >= 0) {
                rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-                                              "XEN_HVM_GUEST_ONLINE",
+                                              "x86/xen/hvm_guest:online",
                                               xen_cpu_up_online, NULL);
                if (rc < 0)
                        cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE);
index 0fecc8a2c0b58987f9a2af56bc40808cbef4dfa3..ff1d81385ed7a2658e3733ea864f0f4434ec0e65 100644 (file)
@@ -422,7 +422,7 @@ static int __init xtensa_pmu_init(void)
        int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
 
        ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
-                               "AP_PERF_XTENSA_STARTING", xtensa_pmu_setup,
+                               "perf/xtensa:starting", xtensa_pmu_setup,
                                NULL);
        if (ret) {
                pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
index 231633328dfa9641ad2f6644321aab6690dc7939..c49da15d979013bb5b890e5cdd4db9296e1508fa 100644 (file)
@@ -1796,7 +1796,7 @@ static int __init cci_platform_init(void)
        int ret;
 
        ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE,
-                                     "AP_PERF_ARM_CCI_ONLINE", NULL,
+                                     "perf/arm/cci:online", NULL,
                                      cci_pmu_offline_cpu);
        if (ret)
                return ret;
index d1074d9b38ba163e624ea1b996df38ad2164f285..4d6a2b7e4d3fc1fead60ccfc60d7f048030d3de1 100644 (file)
@@ -1562,7 +1562,7 @@ static int __init arm_ccn_init(void)
        int i, ret;
 
        ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCN_ONLINE,
-                                     "AP_PERF_ARM_CCN_ONLINE", NULL,
+                                     "perf/arm/ccn:online", NULL,
                                      arm_ccn_pmu_offline_cpu);
        if (ret)
                return ret;
@@ -1570,7 +1570,10 @@ static int __init arm_ccn_init(void)
        for (i = 0; i < ARRAY_SIZE(arm_ccn_pmu_events); i++)
                arm_ccn_pmu_events_attrs[i] = &arm_ccn_pmu_events[i].attr.attr;
 
-       return platform_driver_register(&arm_ccn_driver);
+       ret = platform_driver_register(&arm_ccn_driver);
+       if (ret)
+               cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
+       return ret;
 }
 
 static void __exit arm_ccn_exit(void)
index 3ea46343024f8fea4b64728c6985210bd8331e20..7517f959cba762e484cdc1fb00a78cb95aee5fe0 100644 (file)
@@ -309,7 +309,7 @@ static int __init arc_clockevent_setup(struct device_node *node)
        }
 
        ret = cpuhp_setup_state(CPUHP_AP_ARC_TIMER_STARTING,
-                               "AP_ARC_TIMER_STARTING",
+                               "clockevents/arc/timer:starting",
                                arc_timer_starting_cpu,
                                arc_timer_dying_cpu);
        if (ret) {
index 394e417414d3f297c9f56497f1a00b31afe55d21..4c8c3fb2e8b248b3335d1c4f582f7faab1121d37 100644 (file)
@@ -738,7 +738,7 @@ static int __init arch_timer_register(void)
 
        /* Register and immediately configure the timer on the boot CPU */
        err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
-                               "AP_ARM_ARCH_TIMER_STARTING",
+                               "clockevents/arm/arch_timer:starting",
                                arch_timer_starting_cpu, arch_timer_dying_cpu);
        if (err)
                goto out_unreg_cpupm;
index 570cc58baec441f018ee8ffe704c4b53ce12a008..123ed20ac2ffd406a4e9225ae0a23909066a2f1b 100644 (file)
@@ -316,7 +316,7 @@ static int __init global_timer_of_register(struct device_node *np)
                goto out_irq;
        
        err = cpuhp_setup_state(CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
-                               "AP_ARM_GLOBAL_TIMER_STARTING",
+                               "clockevents/arm/global_timer:starting",
                                gt_starting_cpu, gt_dying_cpu);
        if (err)
                goto out_irq;
index 89f1c2edbe02ee96eaf822e4bafe8139fa556062..01f3f5a59bc60306afee266ff536c85c50c45b1a 100644 (file)
@@ -34,7 +34,7 @@ static int dummy_timer_starting_cpu(unsigned int cpu)
 static int __init dummy_timer_register(void)
 {
        return cpuhp_setup_state(CPUHP_AP_DUMMY_TIMER_STARTING,
-                                "AP_DUMMY_TIMER_STARTING",
+                                "clockevents/dummy_timer:starting",
                                 dummy_timer_starting_cpu, NULL);
 }
 early_initcall(dummy_timer_register);
index c8b9f834f4de1ea00443fc5de7de72dc6bb974fe..4da1dc2278bd7fc34caa9e00d29f71ec1ebd015f 100644 (file)
@@ -552,7 +552,7 @@ static int __init exynos4_timer_resources(struct device_node *np, void __iomem *
 
        /* Install hotplug callbacks which configure the timer on this CPU */
        err = cpuhp_setup_state(CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
-                               "AP_EXYNOS4_MCT_TIMER_STARTING",
+                               "clockevents/exynos4/mct_timer:starting",
                                exynos4_mct_starting_cpu,
                                exynos4_mct_dying_cpu);
        if (err)
index e90a6cfcb061f388618bbb09f724ae32163679f7..7c61226f435918ca3a66d64a4c9f195ae8129994 100644 (file)
@@ -240,7 +240,7 @@ static int __init jcore_pit_init(struct device_node *node)
        }
 
        cpuhp_setup_state(CPUHP_AP_JCORE_TIMER_STARTING,
-                         "AP_JCORE_TIMER_STARTING",
+                         "clockevents/jcore:starting",
                          jcore_pit_local_init, NULL);
 
        return 0;
index 8d06a0f7ff268dc236a5d22500ee2c851bb9ab8a..6fcf96540631d2d9a53a1f64978e99d4cc2f26ab 100644 (file)
@@ -154,6 +154,6 @@ int __init metag_generic_timer_init(void)
 
        /* Hook cpu boot to configure the CPU's timers */
        return cpuhp_setup_state(CPUHP_AP_METAG_TIMER_STARTING,
-                                "AP_METAG_TIMER_STARTING",
+                                "clockevents/metag:starting",
                                 arch_timer_starting_cpu, NULL);
 }
index 7b86d07c99b4ce0327e0f517be7b328a248880a3..d9ef7a61e093964485e2f078c733a299b33cc409 100644 (file)
@@ -120,8 +120,8 @@ static int gic_clockevent_init(void)
        }
 
        cpuhp_setup_state(CPUHP_AP_MIPS_GIC_TIMER_STARTING,
-                         "AP_MIPS_GIC_TIMER_STARTING", gic_starting_cpu,
-                         gic_dying_cpu);
+                         "clockevents/mips/gic/timer:starting",
+                         gic_starting_cpu, gic_dying_cpu);
        return 0;
 }
 
index d5d048d890d4e662389a82d7db2ac254962daf17..ee358cdf4a07b37e1952728fc22e21e2834d4864 100644 (file)
@@ -182,7 +182,7 @@ static int __init msm_timer_init(u32 dgt_hz, int sched_bits, int irq,
        } else {
                /* Install and invoke hotplug callbacks */
                res = cpuhp_setup_state(CPUHP_AP_QCOM_TIMER_STARTING,
-                                       "AP_QCOM_TIMER_STARTING",
+                                       "clockevents/qcom/timer:starting",
                                        msm_local_timer_starting_cpu,
                                        msm_local_timer_dying_cpu);
                if (res) {
index 3c39e6f459714e5b725cfdd10663fb04dd62063b..4440aefc59cdefd9c5b859952a59724d3c37cbf6 100644 (file)
@@ -320,7 +320,7 @@ static int __init armada_370_xp_timer_common_init(struct device_node *np)
        }
 
        res = cpuhp_setup_state(CPUHP_AP_ARMADA_TIMER_STARTING,
-                               "AP_ARMADA_TIMER_STARTING",
+                               "clockevents/armada:starting",
                                armada_370_xp_timer_starting_cpu,
                                armada_370_xp_timer_dying_cpu);
        if (res) {
index db0f21e7d7d25ee02a15b0acb74786c021637149..3d8a181f02528346c5d99053cdd87ad6b6569e34 100644 (file)
@@ -221,7 +221,7 @@ static int __init sirfsoc_clockevent_init(void)
 
        /* Install and invoke hotplug callbacks */
        return cpuhp_setup_state(CPUHP_AP_MARCO_TIMER_STARTING,
-                                "AP_MARCO_TIMER_STARTING",
+                                "clockevents/marco:starting",
                                 sirfsoc_local_timer_starting_cpu,
                                 sirfsoc_local_timer_dying_cpu);
 }
index 3fe368b23d158d56563285052df9aecb869d3ef2..a51b6b64ecdf01ff61938608e89e9d24bcfab13a 100644 (file)
@@ -804,10 +804,10 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
 
        if (!etm_count++) {
                cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
-                                         "AP_ARM_CORESIGHT_STARTING",
+                                         "arm/coresight:starting",
                                          etm_starting_cpu, etm_dying_cpu);
                ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-                                               "AP_ARM_CORESIGHT_ONLINE",
+                                               "arm/coresight:online",
                                                etm_online_cpu, NULL);
                if (ret < 0)
                        goto err_arch_supported;
index 4db8d6a4d0cbbe0545e5724b7de75fbae0438c68..031480f2c34d0f68166e924b180167a7c943d4d9 100644 (file)
@@ -986,11 +986,11 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
                dev_err(dev, "ETM arch init failed\n");
 
        if (!etm4_count++) {
-               cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING,
-                                         "AP_ARM_CORESIGHT4_STARTING",
+               cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
+                                         "arm/coresight4:starting",
                                          etm4_starting_cpu, etm4_dying_cpu);
                ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-                                               "AP_ARM_CORESIGHT4_ONLINE",
+                                               "arm/coresight4:online",
                                                etm4_online_cpu, NULL);
                if (ret < 0)
                        goto err_arch_supported;
@@ -1037,7 +1037,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 
 err_arch_supported:
        if (--etm4_count == 0) {
-               cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT4_STARTING);
+               cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
                if (hp_online)
                        cpuhp_remove_state_nocalls(hp_online);
        }
index 8bcee65a0b8c92a22c8d49ac362656a19233e04f..eb0d4d41b15691721ee8b3a6da87d1fada27248a 100644 (file)
@@ -578,13 +578,13 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 #ifdef CONFIG_SMP
                set_smp_cross_call(armada_mpic_send_doorbell);
                cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING,
-                                         "AP_IRQ_ARMADA_XP_STARTING",
+                                         "irqchip/armada/ipi:starting",
                                          armada_xp_mpic_starting_cpu, NULL);
 #endif
        } else {
 #ifdef CONFIG_SMP
-               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
-                                         "AP_IRQ_ARMADA_CASC_STARTING",
+               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING,
+                                         "irqchip/armada/cascade:starting",
                                          mpic_cascaded_starting_cpu, NULL);
 #endif
                irq_set_chained_handler(parent_irq,
index d96b2c947e74e3edab3917551c64fbd1ced0f34c..e7463e3c08143acae3e8cc5682f918c6a0b07ebd 100644 (file)
@@ -245,7 +245,7 @@ bcm2836_arm_irqchip_smp_init(void)
 #ifdef CONFIG_SMP
        /* Unmask IPIs to the boot CPU. */
        cpuhp_setup_state(CPUHP_AP_IRQ_BCM2836_STARTING,
-                         "AP_IRQ_BCM2836_STARTING", bcm2836_cpu_starting,
+                         "irqchip/bcm2836:starting", bcm2836_cpu_starting,
                          bcm2836_cpu_dying);
 
        set_smp_cross_call(bcm2836_arm_irqchip_send_ipi);
index 26e1d7fafb1e088d3e7f389112a1799022bea3ab..c132f29322cc68a13ab76785d6c7d7b77bed542b 100644 (file)
@@ -632,9 +632,9 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq)
 static void gic_smp_init(void)
 {
        set_smp_cross_call(gic_raise_softirq);
-       cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GICV3_STARTING,
-                                 "AP_IRQ_GICV3_STARTING", gic_starting_cpu,
-                                 NULL);
+       cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
+                                 "irqchip/arm/gicv3:starting",
+                                 gic_starting_cpu, NULL);
 }
 
 static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
index d6c404b3584d5118799f8376d384c2f71b728a69..1b1df4f770bdefe0a16e0109624801f8af90f1f7 100644 (file)
@@ -1191,7 +1191,7 @@ static int __init __gic_init_bases(struct gic_chip_data *gic,
                set_smp_cross_call(gic_raise_softirq);
 #endif
                cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
-                                         "AP_IRQ_GIC_STARTING",
+                                         "irqchip/arm/gic:starting",
                                          gic_starting_cpu, NULL);
                set_handle_irq(gic_handle_irq);
                if (static_key_true(&supports_deactivate))
index 021b0e0833c1f0775c246272b8eeedd0134ddae7..c1b4ee955dbef6e05092efd36e4658ee7d91df4f 100644 (file)
@@ -407,7 +407,7 @@ hip04_of_init(struct device_node *node, struct device_node *parent)
        set_handle_irq(hip04_handle_irq);
 
        hip04_irq_dist_init(&hip04_data);
-       cpuhp_setup_state(CPUHP_AP_IRQ_HIP04_STARTING, "AP_IRQ_HIP04_STARTING",
+       cpuhp_setup_state(CPUHP_AP_IRQ_HIP04_STARTING, "irqchip/hip04:starting",
                          hip04_irq_starting_cpu, NULL);
        return 0;
 }
index 9719caf7437c3aa71dfa6423ec5b07221f8a2e51..a41896468cb328e89a44e13590a0ae611f413106 100644 (file)
@@ -127,7 +127,7 @@ static int __init ledtrig_cpu_init(void)
 
        register_syscore_ops(&ledtrig_cpu_syscore_ops);
 
-       ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_LEDTRIG_STARTING",
+       ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "leds/trigger:starting",
                                ledtrig_online_cpu, ledtrig_prepare_down_cpu);
        if (ret < 0)
                pr_err("CPU hotplug notifier for ledtrig-cpu could not be registered: %d\n",
index 5deeda61d6d3df0531c8fb1431186dc6d7125ecc..4a105006ca637bc985698fa378ff6d14e1494b3f 100644 (file)
@@ -2484,13 +2484,13 @@ static __init int virtio_net_driver_init(void)
 {
        int ret;
 
-       ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "AP_VIRT_NET_ONLINE",
+       ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
                                      virtnet_cpu_online,
                                      virtnet_cpu_down_prep);
        if (ret < 0)
                goto out;
        virtionet_online = ret;
-       ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "VIRT_NET_DEAD",
+       ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
                                      NULL, virtnet_cpu_dead);
        if (ret)
                goto err_dead;
index b37b5729456663bc1639d7ed544634432d883470..6d9335865880e18bd3d8e6172e876fe38180a22a 100644 (file)
@@ -1084,7 +1084,7 @@ static int arm_pmu_hp_init(void)
        int ret;
 
        ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
-                                     "AP_PERF_ARM_STARTING",
+                                     "perf/arm/pmu:starting",
                                      arm_perf_starting_cpu, NULL);
        if (ret)
                pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
index 0990130821fa5ac8a3fc876fd5b055f81a31237a..c639d5a02656abf9678f1ef358c4166e5db04a66 100644 (file)
@@ -127,13 +127,6 @@ module_param_named(log_fka, bnx2fc_log_fka, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(log_fka, " Print message to kernel log when fcoe is "
        "initiating a FIP keep alive when debug logging is enabled.");
 
-static int bnx2fc_cpu_callback(struct notifier_block *nfb,
-                            unsigned long action, void *hcpu);
-/* notification function for CPU hotplug events */
-static struct notifier_block bnx2fc_cpu_notifier = {
-       .notifier_call = bnx2fc_cpu_callback,
-};
-
 static inline struct net_device *bnx2fc_netdev(const struct fc_lport *lport)
 {
        return ((struct bnx2fc_interface *)
@@ -2622,37 +2615,19 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
                kthread_stop(thread);
 }
 
-/**
- * bnx2fc_cpu_callback - Handler for CPU hotplug events
- *
- * @nfb:    The callback data block
- * @action: The event triggering the callback
- * @hcpu:   The index of the CPU that the event is for
- *
- * This creates or destroys per-CPU data for fcoe
- *
- * Returns NOTIFY_OK always.
- */
-static int bnx2fc_cpu_callback(struct notifier_block *nfb,
-                            unsigned long action, void *hcpu)
+
+static int bnx2fc_cpu_online(unsigned int cpu)
 {
-       unsigned cpu = (unsigned long)hcpu;
+       printk(PFX "CPU %x online: Create Rx thread\n", cpu);
+       bnx2fc_percpu_thread_create(cpu);
+       return 0;
+}
 
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               printk(PFX "CPU %x online: Create Rx thread\n", cpu);
-               bnx2fc_percpu_thread_create(cpu);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
-               bnx2fc_percpu_thread_destroy(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
+static int bnx2fc_cpu_dead(unsigned int cpu)
+{
+       printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
+       bnx2fc_percpu_thread_destroy(cpu);
+       return 0;
 }
 
 static int bnx2fc_slave_configure(struct scsi_device *sdev)
@@ -2664,6 +2639,8 @@ static int bnx2fc_slave_configure(struct scsi_device *sdev)
        return 0;
 }
 
+static enum cpuhp_state bnx2fc_online_state;
+
 /**
  * bnx2fc_mod_init - module init entry point
  *
@@ -2724,21 +2701,31 @@ static int __init bnx2fc_mod_init(void)
                spin_lock_init(&p->fp_work_lock);
        }
 
-       cpu_notifier_register_begin();
+       get_online_cpus();
 
-       for_each_online_cpu(cpu) {
+       for_each_online_cpu(cpu)
                bnx2fc_percpu_thread_create(cpu);
-       }
 
-       /* Initialize per CPU interrupt thread */
-       __register_hotcpu_notifier(&bnx2fc_cpu_notifier);
+       rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+                                      "scsi/bnx2fc:online",
+                                      bnx2fc_cpu_online, NULL);
+       if (rc < 0)
+               goto stop_threads;
+       bnx2fc_online_state = rc;
 
-       cpu_notifier_register_done();
+       cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD, "scsi/bnx2fc:dead",
+                                 NULL, bnx2fc_cpu_dead);
+       put_online_cpus();
 
        cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
 
        return 0;
 
+stop_threads:
+       for_each_online_cpu(cpu)
+               bnx2fc_percpu_thread_destroy(cpu);
+       put_online_cpus();
+       kthread_stop(l2_thread);
 free_wq:
        destroy_workqueue(bnx2fc_wq);
 release_bt:
@@ -2797,16 +2784,16 @@ static void __exit bnx2fc_mod_exit(void)
        if (l2_thread)
                kthread_stop(l2_thread);
 
-       cpu_notifier_register_begin();
-
+       get_online_cpus();
        /* Destroy per cpu threads */
        for_each_online_cpu(cpu) {
                bnx2fc_percpu_thread_destroy(cpu);
        }
 
-       __unregister_hotcpu_notifier(&bnx2fc_cpu_notifier);
+       cpuhp_remove_state_nocalls(bnx2fc_online_state);
+       cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD);
 
-       cpu_notifier_register_done();
+       put_online_cpus();
 
        destroy_workqueue(bnx2fc_wq);
        /*
index c8b410c24cf03215a588e41557b3eaf7faa46c13..86afc002814cd07bbc3b55b64ca31201208a514b 100644 (file)
@@ -70,14 +70,6 @@ u64 iscsi_error_mask = 0x00;
 
 DEFINE_PER_CPU(struct bnx2i_percpu_s, bnx2i_percpu);
 
-static int bnx2i_cpu_callback(struct notifier_block *nfb,
-                             unsigned long action, void *hcpu);
-/* notification function for CPU hotplug events */
-static struct notifier_block bnx2i_cpu_notifier = {
-       .notifier_call = bnx2i_cpu_callback,
-};
-
-
 /**
  * bnx2i_identify_device - identifies NetXtreme II device type
  * @hba:               Adapter structure pointer
@@ -461,41 +453,21 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu)
                kthread_stop(thread);
 }
 
-
-/**
- * bnx2i_cpu_callback - Handler for CPU hotplug events
- *
- * @nfb:       The callback data block
- * @action:    The event triggering the callback
- * @hcpu:      The index of the CPU that the event is for
- *
- * This creates or destroys per-CPU data for iSCSI
- *
- * Returns NOTIFY_OK always.
- */
-static int bnx2i_cpu_callback(struct notifier_block *nfb,
-                             unsigned long action, void *hcpu)
+static int bnx2i_cpu_online(unsigned int cpu)
 {
-       unsigned cpu = (unsigned long)hcpu;
+       pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu);
+       bnx2i_percpu_thread_create(cpu);
+       return 0;
+}
 
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               printk(KERN_INFO "bnx2i: CPU %x online: Create Rx thread\n",
-                       cpu);
-               bnx2i_percpu_thread_create(cpu);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               printk(KERN_INFO "CPU %x offline: Remove Rx thread\n", cpu);
-               bnx2i_percpu_thread_destroy(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
+static int bnx2i_cpu_dead(unsigned int cpu)
+{
+       pr_info("CPU %x offline: Remove Rx thread\n", cpu);
+       bnx2i_percpu_thread_destroy(cpu);
+       return 0;
 }
 
+static enum cpuhp_state bnx2i_online_state;
 
 /**
  * bnx2i_mod_init - module init entry point
@@ -539,18 +511,28 @@ static int __init bnx2i_mod_init(void)
                p->iothread = NULL;
        }
 
-       cpu_notifier_register_begin();
+       get_online_cpus();
 
        for_each_online_cpu(cpu)
                bnx2i_percpu_thread_create(cpu);
 
-       /* Initialize per CPU interrupt thread */
-       __register_hotcpu_notifier(&bnx2i_cpu_notifier);
-
-       cpu_notifier_register_done();
+       err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+                                      "scsi/bnx2i:online",
+                                      bnx2i_cpu_online, NULL);
+       if (err < 0)
+               goto remove_threads;
+       bnx2i_online_state = err;
 
+       cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2I_DEAD, "scsi/bnx2i:dead",
+                                 NULL, bnx2i_cpu_dead);
+       put_online_cpus();
        return 0;
 
+remove_threads:
+       for_each_online_cpu(cpu)
+               bnx2i_percpu_thread_destroy(cpu);
+       put_online_cpus();
+       cnic_unregister_driver(CNIC_ULP_ISCSI);
 unreg_xport:
        iscsi_unregister_transport(&bnx2i_iscsi_transport);
 out:
@@ -587,14 +569,14 @@ static void __exit bnx2i_mod_exit(void)
        }
        mutex_unlock(&bnx2i_dev_lock);
 
-       cpu_notifier_register_begin();
+       get_online_cpus();
 
        for_each_online_cpu(cpu)
                bnx2i_percpu_thread_destroy(cpu);
 
-       __unregister_hotcpu_notifier(&bnx2i_cpu_notifier);
-
-       cpu_notifier_register_done();
+       cpuhp_remove_state_nocalls(bnx2i_online_state);
+       cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2I_DEAD);
+       put_online_cpus();
 
        iscsi_unregister_transport(&bnx2i_iscsi_transport);
        cnic_unregister_driver(CNIC_ULP_ISCSI);
index 19ead8d17e55fbfc20315aac9111d7da4a175b7d..5eda21d903e93dfc96702552d2de9deb7f01c734 100644 (file)
@@ -1612,30 +1612,29 @@ static int qedi_percpu_io_thread(void *arg)
        return 0;
 }
 
-static void qedi_percpu_thread_create(unsigned int cpu)
+static int qedi_cpu_online(unsigned int cpu)
 {
-       struct qedi_percpu_s *p;
+       struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu);
        struct task_struct *thread;
 
-       p = &per_cpu(qedi_percpu, cpu);
-
        thread = kthread_create_on_node(qedi_percpu_io_thread, (void *)p,
                                        cpu_to_node(cpu),
                                        "qedi_thread/%d", cpu);
-       if (likely(!IS_ERR(thread))) {
-               kthread_bind(thread, cpu);
-               p->iothread = thread;
-               wake_up_process(thread);
-       }
+       if (IS_ERR(thread))
+               return PTR_ERR(thread);
+
+       kthread_bind(thread, cpu);
+       p->iothread = thread;
+       wake_up_process(thread);
+       return 0;
 }
 
-static void qedi_percpu_thread_destroy(unsigned int cpu)
+static int qedi_cpu_offline(unsigned int cpu)
 {
-       struct qedi_percpu_s *p;
-       struct task_struct *thread;
+       struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu);
        struct qedi_work *work, *tmp;
+       struct task_struct *thread;
 
-       p = &per_cpu(qedi_percpu, cpu);
        spin_lock_bh(&p->p_work_lock);
        thread = p->iothread;
        p->iothread = NULL;
@@ -1650,35 +1649,9 @@ static void qedi_percpu_thread_destroy(unsigned int cpu)
        spin_unlock_bh(&p->p_work_lock);
        if (thread)
                kthread_stop(thread);
+       return 0;
 }
 
-static int qedi_cpu_callback(struct notifier_block *nfb,
-                            unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (unsigned long)hcpu;
-
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               QEDI_ERR(NULL, "CPU %d online.\n", cpu);
-               qedi_percpu_thread_create(cpu);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               QEDI_ERR(NULL, "CPU %d offline.\n", cpu);
-               qedi_percpu_thread_destroy(cpu);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block qedi_cpu_notifier = {
-       .notifier_call = qedi_cpu_callback,
-};
-
 void qedi_reset_host_mtu(struct qedi_ctx *qedi, u16 mtu)
 {
        struct qed_ll2_params params;
@@ -2038,6 +2011,8 @@ static struct pci_device_id qedi_pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, qedi_pci_tbl);
 
+static enum cpuhp_state qedi_cpuhp_state;
+
 static struct pci_driver qedi_pci_driver = {
        .name = QEDI_MODULE_NAME,
        .id_table = qedi_pci_tbl,
@@ -2047,16 +2022,13 @@ static struct pci_driver qedi_pci_driver = {
 
 static int __init qedi_init(void)
 {
-       int rc = 0;
-       int ret;
        struct qedi_percpu_s *p;
-       unsigned int cpu = 0;
+       int cpu, rc = 0;
 
        qedi_ops = qed_get_iscsi_ops();
        if (!qedi_ops) {
                QEDI_ERR(NULL, "Failed to get qed iSCSI operations\n");
-               rc = -EINVAL;
-               goto exit_qedi_init_0;
+               return -EINVAL;
        }
 
 #ifdef CONFIG_DEBUG_FS
@@ -2070,15 +2042,6 @@ static int __init qedi_init(void)
                goto exit_qedi_init_1;
        }
 
-       register_hotcpu_notifier(&qedi_cpu_notifier);
-
-       ret = pci_register_driver(&qedi_pci_driver);
-       if (ret) {
-               QEDI_ERR(NULL, "Failed to register driver\n");
-               rc = -EINVAL;
-               goto exit_qedi_init_2;
-       }
-
        for_each_possible_cpu(cpu) {
                p = &per_cpu(qedi_percpu, cpu);
                INIT_LIST_HEAD(&p->work_list);
@@ -2086,11 +2049,22 @@ static int __init qedi_init(void)
                p->iothread = NULL;
        }
 
-       for_each_online_cpu(cpu)
-               qedi_percpu_thread_create(cpu);
+       rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/qedi:online",
+                              qedi_cpu_online, qedi_cpu_offline);
+       if (rc < 0)
+               goto exit_qedi_init_2;
+       qedi_cpuhp_state = rc;
 
-       return rc;
+       rc = pci_register_driver(&qedi_pci_driver);
+       if (rc) {
+               QEDI_ERR(NULL, "Failed to register driver\n");
+               goto exit_qedi_hp;
+       }
+
+       return 0;
 
+exit_qedi_hp:
+       cpuhp_remove_state(qedi_cpuhp_state);
 exit_qedi_init_2:
        iscsi_unregister_transport(&qedi_iscsi_transport);
 exit_qedi_init_1:
@@ -2098,19 +2072,13 @@ exit_qedi_init_1:
        qedi_dbg_exit();
 #endif
        qed_put_iscsi_ops();
-exit_qedi_init_0:
        return rc;
 }
 
 static void __exit qedi_cleanup(void)
 {
-       unsigned int cpu = 0;
-
-       for_each_online_cpu(cpu)
-               qedi_percpu_thread_destroy(cpu);
-
        pci_unregister_driver(&qedi_pci_driver);
-       unregister_hotcpu_notifier(&qedi_cpu_notifier);
+       cpuhp_remove_state(qedi_cpuhp_state);
        iscsi_unregister_transport(&qedi_iscsi_transport);
 
 #ifdef CONFIG_DEBUG_FS
index 6b9cf06e8df2b91824294b4bc9ee2e5870191ff4..427e2198bb9e85b8a11693fd7fb1495f2799aa7e 100644 (file)
@@ -967,48 +967,38 @@ cfs_cpt_table_create_pattern(char *pattern)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int
-cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (unsigned long)hcpu;
-       bool warn;
-
-       switch (action) {
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               spin_lock(&cpt_data.cpt_lock);
-               cpt_data.cpt_version++;
-               spin_unlock(&cpt_data.cpt_lock);
-               /* Fall through */
-       default:
-               if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) {
-                       CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n",
-                              cpu, action);
-                       break;
-               }
+static enum cpuhp_state lustre_cpu_online;
 
-               mutex_lock(&cpt_data.cpt_mutex);
-               /* if all HTs in a core are offline, it may break affinity */
-               cpumask_copy(cpt_data.cpt_cpumask,
-                            topology_sibling_cpumask(cpu));
-               warn = cpumask_any_and(cpt_data.cpt_cpumask,
-                                      cpu_online_mask) >= nr_cpu_ids;
-               mutex_unlock(&cpt_data.cpt_mutex);
-               CDEBUG(warn ? D_WARNING : D_INFO,
-                      "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n",
-                      cpu, action);
-       }
+static void cfs_cpu_incr_cpt_version(void)
+{
+       spin_lock(&cpt_data.cpt_lock);
+       cpt_data.cpt_version++;
+       spin_unlock(&cpt_data.cpt_lock);
+}
 
-       return NOTIFY_OK;
+static int cfs_cpu_online(unsigned int cpu)
+{
+       cfs_cpu_incr_cpt_version();
+       return 0;
 }
 
-static struct notifier_block cfs_cpu_notifier = {
-       .notifier_call  = cfs_cpu_notify,
-       .priority       = 0
-};
+static int cfs_cpu_dead(unsigned int cpu)
+{
+       bool warn;
+
+       cfs_cpu_incr_cpt_version();
 
+       mutex_lock(&cpt_data.cpt_mutex);
+       /* if all HTs in a core are offline, it may break affinity */
+       cpumask_copy(cpt_data.cpt_cpumask, topology_sibling_cpumask(cpu));
+       warn = cpumask_any_and(cpt_data.cpt_cpumask,
+                              cpu_online_mask) >= nr_cpu_ids;
+       mutex_unlock(&cpt_data.cpt_mutex);
+       CDEBUG(warn ? D_WARNING : D_INFO,
+              "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u]\n",
+              cpu);
+       return 0;
+}
 #endif
 
 void
@@ -1018,7 +1008,9 @@ cfs_cpu_fini(void)
                cfs_cpt_table_free(cfs_cpt_table);
 
 #ifdef CONFIG_HOTPLUG_CPU
-       unregister_hotcpu_notifier(&cfs_cpu_notifier);
+       if (lustre_cpu_online > 0)
+               cpuhp_remove_state_nocalls(lustre_cpu_online);
+       cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
 #endif
        if (cpt_data.cpt_cpumask)
                LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
@@ -1027,6 +1019,8 @@ cfs_cpu_fini(void)
 int
 cfs_cpu_init(void)
 {
+       int ret = 0;
+
        LASSERT(!cfs_cpt_table);
 
        memset(&cpt_data, 0, sizeof(cpt_data));
@@ -1041,8 +1035,19 @@ cfs_cpu_init(void)
        mutex_init(&cpt_data.cpt_mutex);
 
 #ifdef CONFIG_HOTPLUG_CPU
-       register_hotcpu_notifier(&cfs_cpu_notifier);
+       ret = cpuhp_setup_state_nocalls(CPUHP_LUSTRE_CFS_DEAD,
+                                       "staging/lustre/cfe:dead", NULL,
+                                       cfs_cpu_dead);
+       if (ret < 0)
+               goto failed;
+       ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+                                       "staging/lustre/cfe:online",
+                                       cfs_cpu_online, NULL);
+       if (ret < 0)
+               goto failed;
+       lustre_cpu_online = ret;
 #endif
+       ret = -EINVAL;
 
        if (*cpu_pattern) {
                cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
@@ -1075,7 +1080,7 @@ cfs_cpu_init(void)
 
  failed:
        cfs_cpu_fini();
-       return -1;
+       return ret;
 }
 
 #endif
index 7ef27c6ed72fb397167aaf86384da4adf947d6e6..c03f9c86c7e37d580032d1553e829a6f14ef2014 100644 (file)
@@ -445,7 +445,7 @@ int __init xen_evtchn_fifo_init(void)
        evtchn_ops = &evtchn_ops_fifo;
 
        cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
-                                 "CPUHP_XEN_EVTCHN_PREPARE",
+                                 "xen/evtchn:prepare",
                                  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
 out:
        put_cpu();
index 09807c2ce3287940151a674255acbf33238b7dc8..21f9c74496e75e33e300624256e9c69f22f439f3 100644 (file)
@@ -57,9 +57,6 @@ struct notifier_block;
 
 #define CPU_ONLINE             0x0002 /* CPU (unsigned)v is up */
 #define CPU_UP_PREPARE         0x0003 /* CPU (unsigned)v coming up */
-#define CPU_UP_CANCELED                0x0004 /* CPU (unsigned)v NOT coming up */
-#define CPU_DOWN_PREPARE       0x0005 /* CPU (unsigned)v going down */
-#define CPU_DOWN_FAILED                0x0006 /* CPU (unsigned)v NOT going down */
 #define CPU_DEAD               0x0007 /* CPU (unsigned)v dead */
 #define CPU_POST_DEAD          0x0009 /* CPU (unsigned)v dead, cpu_hotplug
                                        * lock is dropped */
@@ -80,80 +77,14 @@ struct notifier_block;
 
 #ifdef CONFIG_SMP
 extern bool cpuhp_tasks_frozen;
-/* Need to know about CPUs going up/down? */
-#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
-#define cpu_notifier(fn, pri) {                                        \
-       static struct notifier_block fn##_nb =                  \
-               { .notifier_call = fn, .priority = pri };       \
-       register_cpu_notifier(&fn##_nb);                        \
-}
-
-#define __cpu_notifier(fn, pri) {                              \
-       static struct notifier_block fn##_nb =                  \
-               { .notifier_call = fn, .priority = pri };       \
-       __register_cpu_notifier(&fn##_nb);                      \
-}
-
-extern int register_cpu_notifier(struct notifier_block *nb);
-extern int __register_cpu_notifier(struct notifier_block *nb);
-extern void unregister_cpu_notifier(struct notifier_block *nb);
-extern void __unregister_cpu_notifier(struct notifier_block *nb);
-
-#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
-#define cpu_notifier(fn, pri)  do { (void)(fn); } while (0)
-#define __cpu_notifier(fn, pri)        do { (void)(fn); } while (0)
-
-static inline int register_cpu_notifier(struct notifier_block *nb)
-{
-       return 0;
-}
-
-static inline int __register_cpu_notifier(struct notifier_block *nb)
-{
-       return 0;
-}
-
-static inline void unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-
-static inline void __unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-#endif
-
 int cpu_up(unsigned int cpu);
 void notify_cpu_starting(unsigned int cpu);
 extern void cpu_maps_update_begin(void);
 extern void cpu_maps_update_done(void);
 
-#define cpu_notifier_register_begin    cpu_maps_update_begin
-#define cpu_notifier_register_done     cpu_maps_update_done
-
 #else  /* CONFIG_SMP */
 #define cpuhp_tasks_frozen     0
 
-#define cpu_notifier(fn, pri)  do { (void)(fn); } while (0)
-#define __cpu_notifier(fn, pri)        do { (void)(fn); } while (0)
-
-static inline int register_cpu_notifier(struct notifier_block *nb)
-{
-       return 0;
-}
-
-static inline int __register_cpu_notifier(struct notifier_block *nb)
-{
-       return 0;
-}
-
-static inline void unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-
-static inline void __unregister_cpu_notifier(struct notifier_block *nb)
-{
-}
-
 static inline void cpu_maps_update_begin(void)
 {
 }
@@ -162,14 +93,6 @@ static inline void cpu_maps_update_done(void)
 {
 }
 
-static inline void cpu_notifier_register_begin(void)
-{
-}
-
-static inline void cpu_notifier_register_done(void)
-{
-}
-
 #endif /* CONFIG_SMP */
 extern struct bus_type cpu_subsys;
 
@@ -182,12 +105,6 @@ extern void get_online_cpus(void);
 extern void put_online_cpus(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
-#define hotcpu_notifier(fn, pri)       cpu_notifier(fn, pri)
-#define __hotcpu_notifier(fn, pri)     __cpu_notifier(fn, pri)
-#define register_hotcpu_notifier(nb)   register_cpu_notifier(nb)
-#define __register_hotcpu_notifier(nb) __register_cpu_notifier(nb)
-#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
-#define __unregister_hotcpu_notifier(nb)       __unregister_cpu_notifier(nb)
 void clear_tasks_mm_cpumask(int cpu);
 int cpu_down(unsigned int cpu);
 
@@ -199,13 +116,6 @@ static inline void cpu_hotplug_done(void) {}
 #define put_online_cpus()      do { } while (0)
 #define cpu_hotplug_disable()  do { } while (0)
 #define cpu_hotplug_enable()   do { } while (0)
-#define hotcpu_notifier(fn, pri)       do { (void)(fn); } while (0)
-#define __hotcpu_notifier(fn, pri)     do { (void)(fn); } while (0)
-/* These aren't inline functions due to a GCC bug. */
-#define register_hotcpu_notifier(nb)   ({ (void)(nb); 0; })
-#define __register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
-#define unregister_hotcpu_notifier(nb) ({ (void)(nb); })
-#define __unregister_hotcpu_notifier(nb)       ({ (void)(nb); })
 #endif         /* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_PM_SLEEP_SMP
index 2ab7bf53d529acf95fbdf7ddbc338b0dae6b5755..20bfefbe75941627c25c30621d4d9b09606c720c 100644 (file)
@@ -41,6 +41,9 @@ enum cpuhp_state {
        CPUHP_NET_DEV_DEAD,
        CPUHP_PCI_XGENE_DEAD,
        CPUHP_IOMMU_INTEL_DEAD,
+       CPUHP_LUSTRE_CFS_DEAD,
+       CPUHP_SCSI_BNX2FC_DEAD,
+       CPUHP_SCSI_BNX2I_DEAD,
        CPUHP_WORKQUEUE_PREP,
        CPUHP_POWER_NUMA_PREPARE,
        CPUHP_HRTIMERS_PREPARE,
@@ -56,7 +59,6 @@ enum cpuhp_state {
        CPUHP_POWERPC_MMU_CTX_PREPARE,
        CPUHP_XEN_PREPARE,
        CPUHP_XEN_EVTCHN_PREPARE,
-       CPUHP_NOTIFY_PREPARE,
        CPUHP_ARM_SHMOBILE_SCU_PREPARE,
        CPUHP_SH_SH3X_PREPARE,
        CPUHP_BLK_MQ_PREPARE,
@@ -71,7 +73,6 @@ enum cpuhp_state {
        CPUHP_KVM_PPC_BOOK3S_PREPARE,
        CPUHP_ZCOMP_PREPARE,
        CPUHP_TIMERS_DEAD,
-       CPUHP_NOTF_ERR_INJ_PREPARE,
        CPUHP_MIPS_SOC_PREPARE,
        CPUHP_BRINGUP_CPU,
        CPUHP_AP_IDLE_DEAD,
@@ -79,10 +80,8 @@ enum cpuhp_state {
        CPUHP_AP_SCHED_STARTING,
        CPUHP_AP_RCUTREE_DYING,
        CPUHP_AP_IRQ_GIC_STARTING,
-       CPUHP_AP_IRQ_GICV3_STARTING,
        CPUHP_AP_IRQ_HIP04_STARTING,
        CPUHP_AP_IRQ_ARMADA_XP_STARTING,
-       CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
        CPUHP_AP_IRQ_BCM2836_STARTING,
        CPUHP_AP_ARM_MVEBU_COHERENCY,
        CPUHP_AP_PERF_X86_UNCORE_STARTING,
@@ -118,7 +117,6 @@ enum cpuhp_state {
        CPUHP_AP_DUMMY_TIMER_STARTING,
        CPUHP_AP_ARM_XEN_STARTING,
        CPUHP_AP_ARM_CORESIGHT_STARTING,
-       CPUHP_AP_ARM_CORESIGHT4_STARTING,
        CPUHP_AP_ARM64_ISNDEP_STARTING,
        CPUHP_AP_SMPCFD_DYING,
        CPUHP_AP_X86_TBOOT_DYING,
@@ -142,7 +140,6 @@ enum cpuhp_state {
        CPUHP_AP_PERF_ARM_L2X0_ONLINE,
        CPUHP_AP_WORKQUEUE_ONLINE,
        CPUHP_AP_RCUTREE_ONLINE,
-       CPUHP_AP_NOTIFY_ONLINE,
        CPUHP_AP_ONLINE_DYN,
        CPUHP_AP_ONLINE_DYN_END         = CPUHP_AP_ONLINE_DYN + 30,
        CPUHP_AP_X86_HPET_ONLINE,
index 4424784ac37495b38d735f7d86fd6b574b2fa16f..fe6b4036664a9a7c82fe4a22be93288928eceac1 100644 (file)
@@ -1758,6 +1758,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
        return ptl;
 }
 
+extern void __init pagecache_init(void);
+
 extern void free_area_init(unsigned long * zones_size);
 extern void free_area_init_node(int nid, unsigned long * zones_size,
                unsigned long zone_start_pfn, unsigned long *zholes_size);
index 74e4dda912384e3ec15df7c3c15449795cda634e..c56b39890a412abfec4acc31e404781215ae3ff6 100644 (file)
@@ -73,6 +73,7 @@
  */
 enum pageflags {
        PG_locked,              /* Page is locked. Don't touch. */
+       PG_waiters,             /* Page has waiters, check its waitqueue */
        PG_error,
        PG_referenced,
        PG_uptodate,
@@ -87,7 +88,6 @@ enum pageflags {
        PG_private_2,           /* If pagecache, has fs aux data */
        PG_writeback,           /* Page is under writeback */
        PG_head,                /* A head page */
-       PG_swapcache,           /* Swap page: swp_entry_t in private */
        PG_mappedtodisk,        /* Has blocks allocated on-disk */
        PG_reclaim,             /* To be reclaimed asap */
        PG_swapbacked,          /* Page is backed by RAM/swap */
@@ -110,6 +110,9 @@ enum pageflags {
        /* Filesystems */
        PG_checked = PG_owner_priv_1,
 
+       /* SwapBacked */
+       PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */
+
        /* Two page bits are conscripted by FS-Cache to maintain local caching
         * state.  These bits are set on pages belonging to the netfs's inodes
         * when those inodes are being locally cached.
@@ -167,6 +170,9 @@ static __always_inline int PageCompound(struct page *page)
  *     for compound page all operations related to the page flag applied to
  *     head page.
  *
+ * PF_ONLY_HEAD:
+ *     for compound page, callers only ever operate on the head page.
+ *
  * PF_NO_TAIL:
  *     modifications of the page flag must be done on small or head pages,
  *     checks can be done on tail pages too.
@@ -176,6 +182,9 @@ static __always_inline int PageCompound(struct page *page)
  */
 #define PF_ANY(page, enforce)  page
 #define PF_HEAD(page, enforce) compound_head(page)
+#define PF_ONLY_HEAD(page, enforce) ({                                 \
+               VM_BUG_ON_PGFLAGS(PageTail(page), page);                \
+               page;})
 #define PF_NO_TAIL(page, enforce) ({                                   \
                VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);     \
                compound_head(page);})
@@ -253,6 +262,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
        TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
 
 __PAGEFLAG(Locked, locked, PF_NO_TAIL)
+PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
 PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
 PAGEFLAG(Referenced, referenced, PF_HEAD)
        TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
@@ -314,7 +324,13 @@ PAGEFLAG_FALSE(HighMem)
 #endif
 
 #ifdef CONFIG_SWAP
-PAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
+static __always_inline int PageSwapCache(struct page *page)
+{
+       return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
+
+}
+SETPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
+CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_COMPOUND)
 #else
 PAGEFLAG_FALSE(SwapCache)
 #endif
@@ -701,12 +717,12 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
  * Flags checked when a page is freed.  Pages being freed should not have
  * these flags set.  It they are, there is a problem.
  */
-#define PAGE_FLAGS_CHECK_AT_FREE \
-       (1UL << PG_lru   | 1UL << PG_locked    | \
-        1UL << PG_private | 1UL << PG_private_2 | \
-        1UL << PG_writeback | 1UL << PG_reserved | \
-        1UL << PG_slab  | 1UL << PG_swapcache | 1UL << PG_active | \
-        1UL << PG_unevictable | __PG_MLOCKED)
+#define PAGE_FLAGS_CHECK_AT_FREE                               \
+       (1UL << PG_lru          | 1UL << PG_locked      |       \
+        1UL << PG_private      | 1UL << PG_private_2   |       \
+        1UL << PG_writeback    | 1UL << PG_reserved    |       \
+        1UL << PG_slab         | 1UL << PG_active      |       \
+        1UL << PG_unevictable  | __PG_MLOCKED)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
@@ -735,6 +751,7 @@ static inline int page_has_private(struct page *page)
 
 #undef PF_ANY
 #undef PF_HEAD
+#undef PF_ONLY_HEAD
 #undef PF_NO_TAIL
 #undef PF_NO_COMPOUND
 #endif /* !__GENERATING_BOUNDS_H */
index f29f80f81dbf93b76c88003f4fbc27caa1c51576..324c8dbad1e13d049a7fed3e9ccac1210c4c5ed8 100644 (file)
@@ -486,22 +486,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
  * and for filesystems which need to wait on PG_private.
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
-
 extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
-extern int wait_on_page_bit_killable_timeout(struct page *page,
-                                            int bit_nr, unsigned long timeout);
-
-static inline int wait_on_page_locked_killable(struct page *page)
-{
-       if (!PageLocked(page))
-               return 0;
-       return wait_on_page_bit_killable(compound_head(page), PG_locked);
-}
+extern void wake_up_page_bit(struct page *page, int bit_nr);
 
-extern wait_queue_head_t *page_waitqueue(struct page *page);
 static inline void wake_up_page(struct page *page, int bit)
 {
-       __wake_up_bit(page_waitqueue(page), &page->flags, bit);
+       if (!PageWaiters(page))
+               return;
+       wake_up_page_bit(page, bit);
 }
 
 /* 
@@ -517,6 +509,13 @@ static inline void wait_on_page_locked(struct page *page)
                wait_on_page_bit(compound_head(page), PG_locked);
 }
 
+static inline int wait_on_page_locked_killable(struct page *page)
+{
+       if (!PageLocked(page))
+               return 0;
+       return wait_on_page_bit_killable(compound_head(page), PG_locked);
+}
+
 /* 
  * Wait for a page to complete writeback
  */
index c78f9f0920b51b61bc8d954438e02eca5b4a6513..5527d910ba3d12ee622cd23bd5aa8f62f2926043 100644 (file)
@@ -375,7 +375,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
 
 void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
-void page_writeback_init(void);
 void balance_dirty_pages_ratelimited(struct address_space *mapping);
 bool wb_over_bg_thresh(struct bdi_writeback *wb);
 
index 5a81ab48a2fb2921328e098c388e77d58a765147..9e687ca9a307b1344889186e46dd5e62a85f614f 100644 (file)
@@ -81,6 +81,7 @@
 
 #define __def_pageflag_names                                           \
        {1UL << PG_locked,              "locked"        },              \
+       {1UL << PG_waiters,             "waiters"       },              \
        {1UL << PG_error,               "error"         },              \
        {1UL << PG_referenced,          "referenced"    },              \
        {1UL << PG_uptodate,            "uptodate"      },              \
@@ -95,7 +96,6 @@
        {1UL << PG_private_2,           "private_2"     },              \
        {1UL << PG_writeback,           "writeback"     },              \
        {1UL << PG_head,                "head"          },              \
-       {1UL << PG_swapcache,           "swapcache"     },              \
        {1UL << PG_mappedtodisk,        "mappedtodisk"  },              \
        {1UL << PG_reclaim,             "reclaim"       },              \
        {1UL << PG_swapbacked,          "swapbacked"    },              \
index c81c9fa21bc770896c737fd408ef372e37d79c1f..b0c9d6facef9a5aced55d1443b40029a660011e8 100644 (file)
@@ -647,9 +647,8 @@ asmlinkage __visible void __init start_kernel(void)
        security_init();
        dbg_late_init();
        vfs_caches_init();
+       pagecache_init();
        signals_init();
-       /* rootfs populating might need page-writeback */
-       page_writeback_init();
        proc_root_init();
        nsfs_init();
        cpuset_init();
index 5339aca811d2289690198119206eaa42d959452f..042fd7e8e03098882b165787b163a597c4c92aa8 100644 (file)
@@ -183,23 +183,16 @@ EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
 /*
  * The following two APIs (cpu_maps_update_begin/done) must be used when
  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
- * The APIs cpu_notifier_register_begin/done() must be used to protect CPU
- * hotplug callback (un)registration performed using __register_cpu_notifier()
- * or __unregister_cpu_notifier().
  */
 void cpu_maps_update_begin(void)
 {
        mutex_lock(&cpu_add_remove_lock);
 }
-EXPORT_SYMBOL(cpu_notifier_register_begin);
 
 void cpu_maps_update_done(void)
 {
        mutex_unlock(&cpu_add_remove_lock);
 }
-EXPORT_SYMBOL(cpu_notifier_register_done);
-
-static RAW_NOTIFIER_HEAD(cpu_chain);
 
 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  * Should always be manipulated under cpu_add_remove_lock
@@ -349,66 +342,7 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif /* CONFIG_HOTPLUG_CPU */
 
-/* Need to know about CPUs going up/down? */
-int register_cpu_notifier(struct notifier_block *nb)
-{
-       int ret;
-       cpu_maps_update_begin();
-       ret = raw_notifier_chain_register(&cpu_chain, nb);
-       cpu_maps_update_done();
-       return ret;
-}
-
-int __register_cpu_notifier(struct notifier_block *nb)
-{
-       return raw_notifier_chain_register(&cpu_chain, nb);
-}
-
-static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
-                       int *nr_calls)
-{
-       unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
-       void *hcpu = (void *)(long)cpu;
-
-       int ret;
-
-       ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
-                                       nr_calls);
-
-       return notifier_to_errno(ret);
-}
-
-static int cpu_notify(unsigned long val, unsigned int cpu)
-{
-       return __cpu_notify(val, cpu, -1, NULL);
-}
-
-static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
-{
-       BUG_ON(cpu_notify(val, cpu));
-}
-
 /* Notifier wrappers for transitioning to state machine */
-static int notify_prepare(unsigned int cpu)
-{
-       int nr_calls = 0;
-       int ret;
-
-       ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
-       if (ret) {
-               nr_calls--;
-               printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
-                               __func__, cpu);
-               __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
-       }
-       return ret;
-}
-
-static int notify_online(unsigned int cpu)
-{
-       cpu_notify(CPU_ONLINE, cpu);
-       return 0;
-}
 
 static int bringup_wait_for_ap(unsigned int cpu)
 {
@@ -433,10 +367,8 @@ static int bringup_cpu(unsigned int cpu)
        /* Arch-specific enabling code. */
        ret = __cpu_up(cpu, idle);
        irq_unlock_sparse();
-       if (ret) {
-               cpu_notify(CPU_UP_CANCELED, cpu);
+       if (ret)
                return ret;
-       }
        ret = bringup_wait_for_ap(cpu);
        BUG_ON(!cpu_online(cpu));
        return ret;
@@ -565,11 +497,6 @@ static void cpuhp_thread_fun(unsigned int cpu)
                BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
 
                undo_cpu_down(cpu, st);
-               /*
-                * This is a momentary workaround to keep the notifier users
-                * happy. Will go away once we got rid of the notifiers.
-                */
-               cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
                st->rollback = false;
        } else {
                /* Cannot happen .... */
@@ -659,22 +586,6 @@ void __init cpuhp_threads_init(void)
        kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
-EXPORT_SYMBOL(register_cpu_notifier);
-EXPORT_SYMBOL(__register_cpu_notifier);
-void unregister_cpu_notifier(struct notifier_block *nb)
-{
-       cpu_maps_update_begin();
-       raw_notifier_chain_unregister(&cpu_chain, nb);
-       cpu_maps_update_done();
-}
-EXPORT_SYMBOL(unregister_cpu_notifier);
-
-void __unregister_cpu_notifier(struct notifier_block *nb)
-{
-       raw_notifier_chain_unregister(&cpu_chain, nb);
-}
-EXPORT_SYMBOL(__unregister_cpu_notifier);
-
 #ifdef CONFIG_HOTPLUG_CPU
 /**
  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
@@ -741,20 +652,6 @@ static inline void check_for_tasks(int dead_cpu)
        read_unlock(&tasklist_lock);
 }
 
-static int notify_down_prepare(unsigned int cpu)
-{
-       int err, nr_calls = 0;
-
-       err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
-       if (err) {
-               nr_calls--;
-               __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
-               pr_warn("%s: attempt to take down CPU %u failed\n",
-                               __func__, cpu);
-       }
-       return err;
-}
-
 /* Take this CPU down. */
 static int take_cpu_down(void *_param)
 {
@@ -833,13 +730,6 @@ static int takedown_cpu(unsigned int cpu)
        return 0;
 }
 
-static int notify_dead(unsigned int cpu)
-{
-       cpu_notify_nofail(CPU_DEAD, cpu);
-       check_for_tasks(cpu);
-       return 0;
-}
-
 static void cpuhp_complete_idle_dead(void *arg)
 {
        struct cpuhp_cpu_state *st = arg;
@@ -863,9 +753,7 @@ void cpuhp_report_idle_dead(void)
 }
 
 #else
-#define notify_down_prepare    NULL
 #define takedown_cpu           NULL
-#define notify_dead            NULL
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -924,9 +812,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
        hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
 out:
        cpu_hotplug_done();
-       /* This post dead nonsense must die */
-       if (!ret && hasdied)
-               cpu_notify_nofail(CPU_POST_DEAD, cpu);
        return ret;
 }
 
@@ -1291,17 +1176,6 @@ static struct cpuhp_step cpuhp_bp_states[] = {
                .startup.single         = rcutree_prepare_cpu,
                .teardown.single        = rcutree_dead_cpu,
        },
-       /*
-        * Preparatory and dead notifiers. Will be replaced once the notifiers
-        * are converted to states.
-        */
-       [CPUHP_NOTIFY_PREPARE] = {
-               .name                   = "notify:prepare",
-               .startup.single         = notify_prepare,
-               .teardown.single        = notify_dead,
-               .skip_onerr             = true,
-               .cant_stop              = true,
-       },
        /*
         * On the tear-down path, timers_dead_cpu() must be invoked
         * before blk_mq_queue_reinit_notify() from notify_dead(),
@@ -1391,17 +1265,6 @@ static struct cpuhp_step cpuhp_ap_states[] = {
                .startup.single         = rcutree_online_cpu,
                .teardown.single        = rcutree_offline_cpu,
        },
-
-       /*
-        * Online/down_prepare notifiers. Will be removed once the notifiers
-        * are converted to states.
-        */
-       [CPUHP_AP_NOTIFY_ONLINE] = {
-               .name                   = "notify:online",
-               .startup.single         = notify_online,
-               .teardown.single        = notify_down_prepare,
-               .skip_onerr             = true,
-       },
 #endif
        /*
         * The dynamically registered state space is here
@@ -1432,23 +1295,53 @@ static int cpuhp_cb_check(enum cpuhp_state state)
        return 0;
 }
 
-static void cpuhp_store_callbacks(enum cpuhp_state state,
-                                 const char *name,
-                                 int (*startup)(unsigned int cpu),
-                                 int (*teardown)(unsigned int cpu),
-                                 bool multi_instance)
+/*
+ * Returns a free for dynamic slot assignment of the Online state. The states
+ * are protected by the cpuhp_slot_states mutex and an empty slot is identified
+ * by having no name assigned.
+ */
+static int cpuhp_reserve_state(enum cpuhp_state state)
+{
+       enum cpuhp_state i;
+
+       for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
+               if (!cpuhp_ap_states[i].name)
+                       return i;
+       }
+       WARN(1, "No more dynamic states available for CPU hotplug\n");
+       return -ENOSPC;
+}
+
+static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
+                                int (*startup)(unsigned int cpu),
+                                int (*teardown)(unsigned int cpu),
+                                bool multi_instance)
 {
        /* (Un)Install the callbacks for further cpu hotplug operations */
        struct cpuhp_step *sp;
+       int ret = 0;
 
        mutex_lock(&cpuhp_state_mutex);
+
+       if (state == CPUHP_AP_ONLINE_DYN) {
+               ret = cpuhp_reserve_state(state);
+               if (ret < 0)
+                       goto out;
+               state = ret;
+       }
        sp = cpuhp_get_step(state);
+       if (name && sp->name) {
+               ret = -EBUSY;
+               goto out;
+       }
        sp->startup.single = startup;
        sp->teardown.single = teardown;
        sp->name = name;
        sp->multi_instance = multi_instance;
        INIT_HLIST_HEAD(&sp->list);
+out:
        mutex_unlock(&cpuhp_state_mutex);
+       return ret;
 }
 
 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
@@ -1509,29 +1402,6 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
        }
 }
 
-/*
- * Returns a free for dynamic slot assignment of the Online state. The states
- * are protected by the cpuhp_slot_states mutex and an empty slot is identified
- * by having no name assigned.
- */
-static int cpuhp_reserve_state(enum cpuhp_state state)
-{
-       enum cpuhp_state i;
-
-       mutex_lock(&cpuhp_state_mutex);
-       for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
-               if (cpuhp_ap_states[i].name)
-                       continue;
-
-               cpuhp_ap_states[i].name = "Reserved";
-               mutex_unlock(&cpuhp_state_mutex);
-               return i;
-       }
-       mutex_unlock(&cpuhp_state_mutex);
-       WARN(1, "No more dynamic states available for CPU hotplug\n");
-       return -ENOSPC;
-}
-
 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
                               bool invoke)
 {
@@ -1580,11 +1450,13 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
 
 /**
  * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
- * @state:     The state to setup
- * @invoke:    If true, the startup function is invoked for cpus where
- *             cpu state >= @state
- * @startup:   startup callback function
- * @teardown:  teardown callback function
+ * @state:             The state to setup
+ * @invoke:            If true, the startup function is invoked for cpus where
+ *                     cpu state >= @state
+ * @startup:           startup callback function
+ * @teardown:          teardown callback function
+ * @multi_instance:    State is set up for multiple instances which get
+ *                     added afterwards.
  *
  * Returns:
  *   On success:
@@ -1599,25 +1471,16 @@ int __cpuhp_setup_state(enum cpuhp_state state,
                        bool multi_instance)
 {
        int cpu, ret = 0;
-       int dyn_state = 0;
 
        if (cpuhp_cb_check(state) || !name)
                return -EINVAL;
 
        get_online_cpus();
 
-       /* currently assignments for the ONLINE state are possible */
-       if (state == CPUHP_AP_ONLINE_DYN) {
-               dyn_state = 1;
-               ret = cpuhp_reserve_state(state);
-               if (ret < 0)
-                       goto out;
-               state = ret;
-       }
+       ret = cpuhp_store_callbacks(state, name, startup, teardown,
+                                   multi_instance);
 
-       cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
-
-       if (!invoke || !startup)
+       if (ret || !invoke || !startup)
                goto out;
 
        /*
@@ -1641,7 +1504,11 @@ int __cpuhp_setup_state(enum cpuhp_state state,
        }
 out:
        put_online_cpus();
-       if (!ret && dyn_state)
+       /*
+        * If the requested state is CPUHP_AP_ONLINE_DYN, return the
+        * dynamically allocated state in case of success.
+        */
+       if (!ret && state == CPUHP_AP_ONLINE_DYN)
                return state;
        return ret;
 }
index cb66a46488401b6a99091b489df029c3035a79dd..b06848a104e6940e128e63a5d3c8c7fb39ffaf8b 100644 (file)
@@ -1538,30 +1538,6 @@ config NOTIFIER_ERROR_INJECTION
 
          Say N if unsure.
 
-config CPU_NOTIFIER_ERROR_INJECT
-       tristate "CPU notifier error injection module"
-       depends on HOTPLUG_CPU && NOTIFIER_ERROR_INJECTION
-       help
-         This option provides a kernel module that can be used to test
-         the error handling of the cpu notifiers by injecting artificial
-         errors to CPU notifier chain callbacks.  It is controlled through
-         debugfs interface under /sys/kernel/debug/notifier-error-inject/cpu
-
-         If the notifier call chain should be failed with some events
-         notified, write the error code to "actions/<notifier event>/error".
-
-         Example: Inject CPU offline error (-1 == -EPERM)
-
-         # cd /sys/kernel/debug/notifier-error-inject/cpu
-         # echo -1 > actions/CPU_DOWN_PREPARE/error
-         # echo 0 > /sys/devices/system/cpu/cpu1/online
-         bash: echo: write error: Operation not permitted
-
-         To compile this code as a module, choose M here: the module will
-         be called cpu-notifier-error-inject.
-
-         If unsure, say N.
-
 config PM_NOTIFIER_ERROR_INJECT
        tristate "PM notifier error injection module"
        depends on PM && NOTIFIER_ERROR_INJECTION
index 50144a3aeebdb8dfcadca9ec735a1b9012fc518d..bc4073a8cd08da8377053c54a09e61f753a3fc7b 100644 (file)
@@ -128,7 +128,6 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
-obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
 obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
deleted file mode 100644 (file)
index 0e2c9a1..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-
-#include "notifier-error-inject.h"
-
-static int priority;
-module_param(priority, int, 0);
-MODULE_PARM_DESC(priority, "specify cpu notifier priority");
-
-#define UP_PREPARE 0
-#define UP_PREPARE_FROZEN 0
-#define DOWN_PREPARE 0
-#define DOWN_PREPARE_FROZEN 0
-
-static struct notifier_err_inject cpu_notifier_err_inject = {
-       .actions = {
-               { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
-               { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
-               { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
-               { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
-               {}
-       }
-};
-
-static int notf_err_handle(struct notifier_err_inject_action *action)
-{
-       int ret;
-
-       ret = action->error;
-       if (ret)
-               pr_info("Injecting error (%d) to %s\n", ret, action->name);
-       return ret;
-}
-
-static int notf_err_inj_up_prepare(unsigned int cpu)
-{
-       if (!cpuhp_tasks_frozen)
-               return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
-       else
-               return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
-}
-
-static int notf_err_inj_dead(unsigned int cpu)
-{
-       if (!cpuhp_tasks_frozen)
-               return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
-       else
-               return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
-}
-
-static struct dentry *dir;
-
-static int err_inject_init(void)
-{
-       int err;
-
-       dir = notifier_err_inject_init("cpu", notifier_err_inject_dir,
-                                       &cpu_notifier_err_inject, priority);
-       if (IS_ERR(dir))
-               return PTR_ERR(dir);
-
-       err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
-                                       "cpu-err-notif:prepare",
-                                       notf_err_inj_up_prepare,
-                                       notf_err_inj_dead);
-       if (err)
-               debugfs_remove_recursive(dir);
-
-       return err;
-}
-
-static void err_inject_exit(void)
-{
-       cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
-       debugfs_remove_recursive(dir);
-}
-
-module_init(err_inject_init);
-module_exit(err_inject_exit);
-
-MODULE_DESCRIPTION("CPU notifier error injection module");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
index 32be3c8f3a112d2fd7b3be42b327d727bf240507..82f26cde830c4b70df30cfa47c7e21dbe6d05a7f 100644 (file)
@@ -739,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc);
  * at a cost of "thundering herd" phenomena during rare hash
  * collisions.
  */
-wait_queue_head_t *page_waitqueue(struct page *page)
+#define PAGE_WAIT_TABLE_BITS 8
+#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
+static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
+
+static wait_queue_head_t *page_waitqueue(struct page *page)
 {
-       return bit_waitqueue(page, 0);
+       return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
 }
-EXPORT_SYMBOL(page_waitqueue);
 
-void wait_on_page_bit(struct page *page, int bit_nr)
+void __init pagecache_init(void)
 {
-       DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+       int i;
 
-       if (test_bit(bit_nr, &page->flags))
-               __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
-                                                       TASK_UNINTERRUPTIBLE);
+       for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
+               init_waitqueue_head(&page_wait_table[i]);
+
+       page_writeback_init();
 }
-EXPORT_SYMBOL(wait_on_page_bit);
 
-int wait_on_page_bit_killable(struct page *page, int bit_nr)
+struct wait_page_key {
+       struct page *page;
+       int bit_nr;
+       int page_match;
+};
+
+struct wait_page_queue {
+       struct page *page;
+       int bit_nr;
+       wait_queue_t wait;
+};
+
+static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
 {
-       DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+       struct wait_page_key *key = arg;
+       struct wait_page_queue *wait_page
+               = container_of(wait, struct wait_page_queue, wait);
+
+       if (wait_page->page != key->page)
+              return 0;
+       key->page_match = 1;
 
-       if (!test_bit(bit_nr, &page->flags))
+       if (wait_page->bit_nr != key->bit_nr)
+               return 0;
+       if (test_bit(key->bit_nr, &key->page->flags))
                return 0;
 
-       return __wait_on_bit(page_waitqueue(page), &wait,
-                            bit_wait_io, TASK_KILLABLE);
+       return autoremove_wake_function(wait, mode, sync, key);
 }
 
-int wait_on_page_bit_killable_timeout(struct page *page,
-                                      int bit_nr, unsigned long timeout)
+void wake_up_page_bit(struct page *page, int bit_nr)
 {
-       DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+       wait_queue_head_t *q = page_waitqueue(page);
+       struct wait_page_key key;
+       unsigned long flags;
 
-       wait.key.timeout = jiffies + timeout;
-       if (!test_bit(bit_nr, &page->flags))
-               return 0;
-       return __wait_on_bit(page_waitqueue(page), &wait,
-                            bit_wait_io_timeout, TASK_KILLABLE);
+       key.page = page;
+       key.bit_nr = bit_nr;
+       key.page_match = 0;
+
+       spin_lock_irqsave(&q->lock, flags);
+       __wake_up_locked_key(q, TASK_NORMAL, &key);
+       /*
+        * It is possible for other pages to have collided on the waitqueue
+        * hash, so in that case check for a page match. That prevents a long-
+        * term waiter
+        *
+        * It is still possible to miss a case here, when we woke page waiters
+        * and removed them from the waitqueue, but there are still other
+        * page waiters.
+        */
+       if (!waitqueue_active(q) || !key.page_match) {
+               ClearPageWaiters(page);
+               /*
+                * It's possible to miss clearing Waiters here, when we woke
+                * our page waiters, but the hashed waitqueue has waiters for
+                * other pages on it.
+                *
+                * That's okay, it's a rare case. The next waker will clear it.
+                */
+       }
+       spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(wake_up_page_bit);
+
+static inline int wait_on_page_bit_common(wait_queue_head_t *q,
+               struct page *page, int bit_nr, int state, bool lock)
+{
+       struct wait_page_queue wait_page;
+       wait_queue_t *wait = &wait_page.wait;
+       int ret = 0;
+
+       init_wait(wait);
+       wait->func = wake_page_function;
+       wait_page.page = page;
+       wait_page.bit_nr = bit_nr;
+
+       for (;;) {
+               spin_lock_irq(&q->lock);
+
+               if (likely(list_empty(&wait->task_list))) {
+                       if (lock)
+                               __add_wait_queue_tail_exclusive(q, wait);
+                       else
+                               __add_wait_queue(q, wait);
+                       SetPageWaiters(page);
+               }
+
+               set_current_state(state);
+
+               spin_unlock_irq(&q->lock);
+
+               if (likely(test_bit(bit_nr, &page->flags))) {
+                       io_schedule();
+                       if (unlikely(signal_pending_state(state, current))) {
+                               ret = -EINTR;
+                               break;
+                       }
+               }
+
+               if (lock) {
+                       if (!test_and_set_bit_lock(bit_nr, &page->flags))
+                               break;
+               } else {
+                       if (!test_bit(bit_nr, &page->flags))
+                               break;
+               }
+       }
+
+       finish_wait(q, wait);
+
+       /*
+        * A signal could leave PageWaiters set. Clearing it here if
+        * !waitqueue_active would be possible (by open-coding finish_wait),
+        * but still fail to catch it in the case of wait hash collision. We
+        * already can fail to clear wait hash collision cases, so don't
+        * bother with signals either.
+        */
+
+       return ret;
+}
+
+void wait_on_page_bit(struct page *page, int bit_nr)
+{
+       wait_queue_head_t *q = page_waitqueue(page);
+       wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
+}
+EXPORT_SYMBOL(wait_on_page_bit);
+
+int wait_on_page_bit_killable(struct page *page, int bit_nr)
+{
+       wait_queue_head_t *q = page_waitqueue(page);
+       return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
 }
-EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
 
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
@@ -793,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
 
        spin_lock_irqsave(&q->lock, flags);
        __add_wait_queue(q, waiter);
+       SetPageWaiters(page);
        spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL_GPL(add_page_wait_queue);
@@ -874,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio);
  * __lock_page - get a lock on the page, assuming we need to sleep to get it
  * @page: the page to lock
  */
-void __lock_page(struct page *page)
+void __lock_page(struct page *__page)
 {
-       struct page *page_head = compound_head(page);
-       DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
-
-       __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
-                                                       TASK_UNINTERRUPTIBLE);
+       struct page *page = compound_head(__page);
+       wait_queue_head_t *q = page_waitqueue(page);
+       wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
 }
 EXPORT_SYMBOL(__lock_page);
 
-int __lock_page_killable(struct page *page)
+int __lock_page_killable(struct page *__page)
 {
-       struct page *page_head = compound_head(page);
-       DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
-
-       return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
-                                       bit_wait_io, TASK_KILLABLE);
+       struct page *page = compound_head(__page);
+       wait_queue_head_t *q = page_waitqueue(page);
+       return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
 
index 44d68895a9b9b29deefbed2be37e52eb94ba9f33..7aa2ea0a8623c2bac9bb4d29889ad163b58a4195 100644 (file)
@@ -36,6 +36,8 @@
 /* Do not use these with a slab allocator */
 #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
+void page_writeback_init(void);
+
 int do_swap_page(struct vm_fault *vmf);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
index 19e796d36a629147dd36217ecab34934300dc660..f283c7e0a2a302c617c03a3aebf60e262b94a895 100644 (file)
@@ -764,12 +764,11 @@ static int me_huge_page(struct page *p, unsigned long pfn)
  */
 
 #define dirty          (1UL << PG_dirty)
-#define sc             (1UL << PG_swapcache)
+#define sc             ((1UL << PG_swapcache) | (1UL << PG_swapbacked))
 #define unevict                (1UL << PG_unevictable)
 #define mlock          (1UL << PG_mlocked)
 #define writeback      (1UL << PG_writeback)
 #define lru            (1UL << PG_lru)
-#define swapbacked     (1UL << PG_swapbacked)
 #define head           (1UL << PG_head)
 #define slab           (1UL << PG_slab)
 #define reserved       (1UL << PG_reserved)
@@ -819,7 +818,6 @@ static struct page_state {
 #undef mlock
 #undef writeback
 #undef lru
-#undef swapbacked
 #undef head
 #undef slab
 #undef reserved
index 0ed24b1fa77b89cd49738390cda724c5e74008f8..87f4d0f818194ea68cd323b1bcf84dd430b826a3 100644 (file)
@@ -466,13 +466,15 @@ int migrate_page_move_mapping(struct address_space *mapping,
         */
        newpage->index = page->index;
        newpage->mapping = page->mapping;
-       if (PageSwapBacked(page))
-               __SetPageSwapBacked(newpage);
-
        get_page(newpage);      /* add cache reference */
-       if (PageSwapCache(page)) {
-               SetPageSwapCache(newpage);
-               set_page_private(newpage, page_private(page));
+       if (PageSwapBacked(page)) {
+               __SetPageSwapBacked(newpage);
+               if (PageSwapCache(page)) {
+                       SetPageSwapCache(newpage);
+                       set_page_private(newpage, page_private(page));
+               }
+       } else {
+               VM_BUG_ON_PAGE(PageSwapCache(page), page);
        }
 
        /* Move dirty while page refs frozen and newpage not yet exposed */
index 4dcf852e1e6d8f2e9f0eeca9ee39f620ea972957..844baedd24292f0803ddadb2a2be03a754ec150b 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -69,6 +69,7 @@ static void __page_cache_release(struct page *page)
                del_page_from_lru_list(page, lruvec, page_off_lru(page));
                spin_unlock_irqrestore(zone_lru_lock(zone), flags);
        }
+       __ClearPageWaiters(page);
        mem_cgroup_uncharge(page);
 }
 
@@ -784,6 +785,7 @@ void release_pages(struct page **pages, int nr, bool cold)
 
                /* Clear Active bit in case of parallel mark_page_accessed */
                __ClearPageActive(page);
+               __ClearPageWaiters(page);
 
                list_add(&page->lru, &pages_to_free);
        }
index 8561e7ddca59e7ce4267f08039a8105df2771645..8792ad8dbf837064adb180afa34ae0e275bdc6e5 100644 (file)
@@ -10,6 +10,7 @@ endif
 turbostat : turbostat.c
 CFLAGS +=      -Wall
 CFLAGS +=      -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
+CFLAGS +=      -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
 
 %: %.c
        @mkdir -p $(BUILD_OUTPUT)
index 492e84fbebfa8afd97171f3bd1e86e6e6b6caf49..03cb639b292ecc0e507b1c5190fffc877a9c4ac8 100644 (file)
@@ -25,9 +25,27 @@ Some information is not available on older processors.
 .SS Options
 Options can be specified with a single or double '-', and only as much of the option
 name as necessary to disambiguate it from others is necessary.  Note that options are case-sensitive.
-\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter.
 .PP
-\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter.
+\fB--add attributes\fP add column with counter having specified 'attributes'.  The 'location' attribute is required, all others are optional.
+.nf
+       location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP}
+               msrDDD is a decimal offset, eg. msr16
+               msr0xXXX is a hex offset, eg. msr0x10
+
+       scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP}
+               sample and print the counter for every cpu, core, or package.
+               default: cpu
+
+       size: {\fBu32\fP | \fBu64\fP }
+               MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits.
+               default: u64
+
+       format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP}
+               'raw' shows the MSR contents in hex.
+               'delta' shows the difference in values during the measurement interval.
+               'percent' shows the delta as a percentage of the cycles elapsed.
+               default: delta
+.fi
 .PP
 \fB--Dump\fP displays the raw counter values.
 .PP
@@ -43,10 +61,6 @@ The file is truncated if it already exists, and it is created if it does not exi
 .PP
 \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts.
 .PP
-\fB--MSR MSR#\fP shows the specified 64-bit MSR value.
-.PP
-\fB--msr MSR#\fP shows the specified 32-bit MSR value.
-.PP
 \fB--Package\fP limits output to the system summary plus the 1st thread in each Package.
 .PP
 \fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package.  Ie. it skips hyper-threaded siblings.
index 3e199b508a96f566159f2589d3112612fe91f36f..f13f61b065c699f2364d8d033d3a4f3a36110533 100644 (file)
@@ -21,6 +21,7 @@
 
 #define _GNU_SOURCE
 #include MSRHEADER
+#include INTEL_FAMILY_HEADER
 #include <stdarg.h>
 #include <stdio.h>
 #include <err.h>
@@ -51,8 +52,6 @@ unsigned int debug;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int dump_only;
-unsigned int skip_c0;
-unsigned int skip_c1;
 unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
@@ -72,10 +71,6 @@ unsigned int units = 1000000;        /* MHz etc */
 unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
-unsigned int extra_msr_offset32;
-unsigned int extra_msr_offset64;
-unsigned int extra_delta_offset32;
-unsigned int extra_delta_offset64;
 unsigned int aperf_mperf_multiplier = 1;
 int do_irq = 1;
 int do_smi;
@@ -131,9 +126,8 @@ unsigned int has_hwp_pkg;           /* IA32_HWP_REQUEST_PKG */
 #define RAPL_DRAM_POWER_INFO   (1 << 5)
                                        /* 0x61c MSR_DRAM_POWER_INFO */
 
-#define RAPL_CORES             (1 << 6)
+#define RAPL_CORES_POWER_LIMIT (1 << 6)
                                        /* 0x638 MSR_PP0_POWER_LIMIT */
-                                       /* 0x639 MSR_PP0_ENERGY_STATUS */
 #define RAPL_CORE_POLICY       (1 << 7)
                                        /* 0x63a MSR_PP0_POLICY */
 
@@ -141,11 +135,20 @@ unsigned int has_hwp_pkg;         /* IA32_HWP_REQUEST_PKG */
                                        /* 0x640 MSR_PP1_POWER_LIMIT */
                                        /* 0x641 MSR_PP1_ENERGY_STATUS */
                                        /* 0x642 MSR_PP1_POLICY */
+
+#define RAPL_CORES_ENERGY_STATUS       (1 << 9)
+                                       /* 0x639 MSR_PP0_ENERGY_STATUS */
+#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
 #define        TJMAX_DEFAULT   100
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
-int aperf_mperf_unstable;
+/*
+ * buffer size used by sscanf() for added column names
+ * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
+ */
+#define        NAME_BYTES 20
+
 int backwards_count;
 char *progname;
 
@@ -157,16 +160,13 @@ struct thread_data {
        unsigned long long aperf;
        unsigned long long mperf;
        unsigned long long c1;
-       unsigned long long extra_msr64;
-       unsigned long long extra_delta64;
-       unsigned long long extra_msr32;
-       unsigned long long extra_delta32;
        unsigned int irq_count;
        unsigned int smi_count;
        unsigned int cpu_id;
        unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE    0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
+       unsigned long long counter[1];
 } *thread_even, *thread_odd;
 
 struct core_data {
@@ -175,6 +175,7 @@ struct core_data {
        unsigned long long c7;
        unsigned int core_temp_c;
        unsigned int core_id;
+       unsigned long long counter[1];
 } *core_even, *core_odd;
 
 struct pkg_data {
@@ -199,7 +200,7 @@ struct pkg_data {
        unsigned int rapl_pkg_perf_status;      /* MSR_PKG_PERF_STATUS */
        unsigned int rapl_dram_perf_status;     /* MSR_DRAM_PERF_STATUS */
        unsigned int pkg_temp_c;
-
+       unsigned long long counter[1];
 } *package_even, *package_odd;
 
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -213,11 +214,33 @@ struct pkg_data {
        (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
+enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
+enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
+enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
+
+struct msr_counter {
+       unsigned int msr_num;
+       char name[NAME_BYTES];
+       unsigned int width;
+       enum counter_type type;
+       enum counter_format format;
+       struct msr_counter *next;
+};
+
+struct sys_counters {
+       unsigned int thread_counter_bytes;
+       unsigned int core_counter_bytes;
+       unsigned int package_counter_bytes;
+       struct msr_counter *tp;
+       struct msr_counter *cp;
+       struct msr_counter *pp;
+} sys;
+
 struct system_summary {
        struct thread_data threads;
        struct core_data cores;
        struct pkg_data packages;
-} sum, average;
+} average;
 
 
 struct topo_params {
@@ -319,120 +342,148 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 /*
  * Example Format w/ field column widths:
  *
- *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
+ *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp  CoreCnt PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt
  * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
  */
 
 void print_header(void)
 {
+       struct msr_counter *mp;
+
        if (show_pkg)
-               outp += sprintf(outp, " Package");
+               outp += sprintf(outp, "\tPackage");
        if (show_core)
-               outp += sprintf(outp, "    Core");
+               outp += sprintf(outp, "\tCore");
        if (show_cpu)
-               outp += sprintf(outp, "     CPU");
+               outp += sprintf(outp, "\tCPU");
        if (has_aperf)
-               outp += sprintf(outp, " Avg_MHz");
+               outp += sprintf(outp, "\tAvg_MHz");
        if (has_aperf)
-               outp += sprintf(outp, "   Busy%%");
+               outp += sprintf(outp, "\tBusy%%");
        if (has_aperf)
-               outp += sprintf(outp, " Bzy_MHz");
-       outp += sprintf(outp, " TSC_MHz");
-
-       if (extra_delta_offset32)
-               outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
-       if (extra_delta_offset64)
-               outp += sprintf(outp, "  COUNT 0x%03X", extra_delta_offset64);
-       if (extra_msr_offset32)
-               outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
-       if (extra_msr_offset64)
-               outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
+               outp += sprintf(outp, "\tBzy_MHz");
+       outp += sprintf(outp, "\tTSC_MHz");
 
        if (!debug)
                goto done;
 
        if (do_irq)
-               outp += sprintf(outp, "     IRQ");
+               outp += sprintf(outp, "\tIRQ");
        if (do_smi)
-               outp += sprintf(outp, "     SMI");
+               outp += sprintf(outp, "\tSMI");
 
        if (do_nhm_cstates)
-               outp += sprintf(outp, "  CPU%%c1");
+               outp += sprintf(outp, "\tCPU%%c1");
        if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
-               outp += sprintf(outp, "  CPU%%c3");
+               outp += sprintf(outp, "\tCPU%%c3");
        if (do_nhm_cstates)
-               outp += sprintf(outp, "  CPU%%c6");
+               outp += sprintf(outp, "\tCPU%%c6");
        if (do_snb_cstates)
-               outp += sprintf(outp, "  CPU%%c7");
+               outp += sprintf(outp, "\tCPU%%c7");
+
+       for (mp = sys.tp; mp; mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 64)
+                               outp += sprintf(outp, "\t%18.18s", mp->name);
+                       else
+                               outp += sprintf(outp, "\t%10.10s", mp->name);
+               } else {
+                       outp += sprintf(outp, "\t%-7.7s", mp->name);
+               }
+       }
 
        if (do_dts)
-               outp += sprintf(outp, " CoreTmp");
+               outp += sprintf(outp, "\tCoreTmp");
+
+       for (mp = sys.cp; mp; mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 64)
+                               outp += sprintf(outp, "\t%18.18s", mp->name);
+                       else
+                               outp += sprintf(outp, "\t%10.10s", mp->name);
+               } else {
+                       outp += sprintf(outp, "\t%-7.7s", mp->name);
+               }
+       }
+
        if (do_ptm)
-               outp += sprintf(outp, "  PkgTmp");
+               outp += sprintf(outp, "\tPkgTmp");
 
        if (do_gfx_rc6_ms)
-               outp += sprintf(outp, " GFX%%rc6");
+               outp += sprintf(outp, "\tGFX%%rc6");
 
        if (do_gfx_mhz)
-               outp += sprintf(outp, "  GFXMHz");
+               outp += sprintf(outp, "\tGFXMHz");
 
        if (do_skl_residency) {
-               outp += sprintf(outp, " Totl%%C0");
-               outp += sprintf(outp, "  Any%%C0");
-               outp += sprintf(outp, "  GFX%%C0");
-               outp += sprintf(outp, " CPUGFX%%");
+               outp += sprintf(outp, "\tTotl%%C0");
+               outp += sprintf(outp, "\tAny%%C0");
+               outp += sprintf(outp, "\tGFX%%C0");
+               outp += sprintf(outp, "\tCPUGFX%%");
        }
 
        if (do_pc2)
-               outp += sprintf(outp, " Pkg%%pc2");
+               outp += sprintf(outp, "\tPkg%%pc2");
        if (do_pc3)
-               outp += sprintf(outp, " Pkg%%pc3");
+               outp += sprintf(outp, "\tPkg%%pc3");
        if (do_pc6)
-               outp += sprintf(outp, " Pkg%%pc6");
+               outp += sprintf(outp, "\tPkg%%pc6");
        if (do_pc7)
-               outp += sprintf(outp, " Pkg%%pc7");
+               outp += sprintf(outp, "\tPkg%%pc7");
        if (do_c8_c9_c10) {
-               outp += sprintf(outp, " Pkg%%pc8");
-               outp += sprintf(outp, " Pkg%%pc9");
-               outp += sprintf(outp, " Pk%%pc10");
+               outp += sprintf(outp, "\tPkg%%pc8");
+               outp += sprintf(outp, "\tPkg%%pc9");
+               outp += sprintf(outp, "\tPk%%pc10");
        }
 
        if (do_rapl && !rapl_joules) {
                if (do_rapl & RAPL_PKG)
-                       outp += sprintf(outp, " PkgWatt");
-               if (do_rapl & RAPL_CORES)
-                       outp += sprintf(outp, " CorWatt");
+                       outp += sprintf(outp, "\tPkgWatt");
+               if (do_rapl & RAPL_CORES_ENERGY_STATUS)
+                       outp += sprintf(outp, "\tCorWatt");
                if (do_rapl & RAPL_GFX)
-                       outp += sprintf(outp, " GFXWatt");
+                       outp += sprintf(outp, "\tGFXWatt");
                if (do_rapl & RAPL_DRAM)
-                       outp += sprintf(outp, " RAMWatt");
+                       outp += sprintf(outp, "\tRAMWatt");
                if (do_rapl & RAPL_PKG_PERF_STATUS)
-                       outp += sprintf(outp, "   PKG_%%");
+                       outp += sprintf(outp, "\tPKG_%%");
                if (do_rapl & RAPL_DRAM_PERF_STATUS)
-                       outp += sprintf(outp, "   RAM_%%");
+                       outp += sprintf(outp, "\tRAM_%%");
        } else if (do_rapl && rapl_joules) {
                if (do_rapl & RAPL_PKG)
-                       outp += sprintf(outp, "   Pkg_J");
-               if (do_rapl & RAPL_CORES)
-                       outp += sprintf(outp, "   Cor_J");
+                       outp += sprintf(outp, "\tPkg_J");
+               if (do_rapl & RAPL_CORES_ENERGY_STATUS)
+                       outp += sprintf(outp, "\tCor_J");
                if (do_rapl & RAPL_GFX)
-                       outp += sprintf(outp, "   GFX_J");
+                       outp += sprintf(outp, "\tGFX_J");
                if (do_rapl & RAPL_DRAM)
-                       outp += sprintf(outp, "   RAM_J");
+                       outp += sprintf(outp, "\tRAM_J");
                if (do_rapl & RAPL_PKG_PERF_STATUS)
-                       outp += sprintf(outp, "   PKG_%%");
+                       outp += sprintf(outp, "\tPKG_%%");
                if (do_rapl & RAPL_DRAM_PERF_STATUS)
-                       outp += sprintf(outp, "   RAM_%%");
-               outp += sprintf(outp, "   time");
-
+                       outp += sprintf(outp, "\tRAM_%%");
        }
-    done:
+       for (mp = sys.pp; mp; mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 64)
+                               outp += sprintf(outp, "\t%18.18s", mp->name);
+                       else
+                               outp += sprintf(outp, "\t%10.10s", mp->name);
+               } else {
+                       outp += sprintf(outp, "\t%-7.7s", mp->name);
+               }
+       }
+
+done:
        outp += sprintf(outp, "\n");
 }
 
 int dump_counters(struct thread_data *t, struct core_data *c,
        struct pkg_data *p)
 {
+       int i;
+       struct msr_counter *mp;
+
        outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
 
        if (t) {
@@ -442,18 +493,16 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
                outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
                outp += sprintf(outp, "c1: %016llX\n", t->c1);
-               outp += sprintf(outp, "msr0x%x: %08llX\n",
-                       extra_delta_offset32, t->extra_delta32);
-               outp += sprintf(outp, "msr0x%x: %016llX\n",
-                       extra_delta_offset64, t->extra_delta64);
-               outp += sprintf(outp, "msr0x%x: %08llX\n",
-                       extra_msr_offset32, t->extra_msr32);
-               outp += sprintf(outp, "msr0x%x: %016llX\n",
-                       extra_msr_offset64, t->extra_msr64);
+
                if (do_irq)
                        outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
                if (do_smi)
                        outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
+
+               for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+                       outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
+                               i, mp->msr_num, t->counter[i]);
+               }
        }
 
        if (c) {
@@ -462,6 +511,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "c6: %016llX\n", c->c6);
                outp += sprintf(outp, "c7: %016llX\n", c->c7);
                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+
+               for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+                       outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
+                               i, mp->msr_num, c->counter[i]);
+               }
        }
 
        if (p) {
@@ -491,6 +545,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
                outp += sprintf(outp, "Throttle RAM: %0X\n",
                        p->rapl_dram_perf_status);
                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
+
+               for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+                       outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
+                               i, mp->msr_num, p->counter[i]);
+               }
        }
 
        outp += sprintf(outp, "\n");
@@ -506,6 +565,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
 {
        double interval_float;
        char *fmt8;
+       int i;
+       struct msr_counter *mp;
 
         /* if showing only 1st thread in core and this isn't one, bail out */
        if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -520,99 +581,103 @@ int format_counters(struct thread_data *t, struct core_data *c,
        /* topo columns, print blanks on 1st (average) line */
        if (t == &average.threads) {
                if (show_pkg)
-                       outp += sprintf(outp, "       -");
+                       outp += sprintf(outp, "\t-");
                if (show_core)
-                       outp += sprintf(outp, "       -");
+                       outp += sprintf(outp, "\t-");
                if (show_cpu)
-                       outp += sprintf(outp, "       -");
+                       outp += sprintf(outp, "\t-");
        } else {
                if (show_pkg) {
                        if (p)
-                               outp += sprintf(outp, "%8d", p->package_id);
+                               outp += sprintf(outp, "\t%d", p->package_id);
                        else
-                               outp += sprintf(outp, "       -");
+                               outp += sprintf(outp, "\t-");
                }
                if (show_core) {
                        if (c)
-                               outp += sprintf(outp, "%8d", c->core_id);
+                               outp += sprintf(outp, "\t%d", c->core_id);
                        else
-                               outp += sprintf(outp, "       -");
+                               outp += sprintf(outp, "\t-");
                }
                if (show_cpu)
-                       outp += sprintf(outp, "%8d", t->cpu_id);
+                       outp += sprintf(outp, "\t%d", t->cpu_id);
        }
 
        /* Avg_MHz */
        if (has_aperf)
-               outp += sprintf(outp, "%8.0f",
+               outp += sprintf(outp, "\t%.0f",
                        1.0 / units * t->aperf / interval_float);
 
        /* Busy% */
-       if (has_aperf) {
-               if (!skip_c0)
-                       outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
-               else
-                       outp += sprintf(outp, "********");
-       }
+       if (has_aperf)
+               outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
 
        /* Bzy_MHz */
        if (has_aperf) {
                if (has_base_hz)
-                       outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf);
+                       outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
                else
-                       outp += sprintf(outp, "%8.0f",
+                       outp += sprintf(outp, "\t%.0f",
                                1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
        }
 
        /* TSC_MHz */
-       outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
-
-       /* delta */
-       if (extra_delta_offset32)
-               outp += sprintf(outp, "  %11llu", t->extra_delta32);
-
-       /* DELTA */
-       if (extra_delta_offset64)
-               outp += sprintf(outp, "  %11llu", t->extra_delta64);
-       /* msr */
-       if (extra_msr_offset32)
-               outp += sprintf(outp, "  0x%08llx", t->extra_msr32);
-
-       /* MSR */
-       if (extra_msr_offset64)
-               outp += sprintf(outp, "  0x%016llx", t->extra_msr64);
+       outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
        if (!debug)
                goto done;
 
        /* IRQ */
        if (do_irq)
-               outp += sprintf(outp, "%8d", t->irq_count);
+               outp += sprintf(outp, "\t%d", t->irq_count);
 
        /* SMI */
        if (do_smi)
-               outp += sprintf(outp, "%8d", t->smi_count);
+               outp += sprintf(outp, "\t%d", t->smi_count);
 
-       if (do_nhm_cstates) {
-               if (!skip_c1)
-                       outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
-               else
-                       outp += sprintf(outp, "********");
-       }
+       if (do_nhm_cstates)
+               outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
 
        /* print per-core data only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                goto done;
 
        if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
-               outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
        if (do_nhm_cstates)
-               outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
        if (do_snb_cstates)
-               outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
+
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 32)
+                               outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
+                       else
+                               outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
+               } else if (mp->format == FORMAT_DELTA) {
+                       outp += sprintf(outp, "\t%8lld", t->counter[i]);
+               } else if (mp->format == FORMAT_PERCENT) {
+                       outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
+               }
+       }
+
 
        if (do_dts)
-               outp += sprintf(outp, "%8d", c->core_temp_c);
+               outp += sprintf(outp, "\t%d", c->core_temp_c);
+
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 32)
+                               outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
+                       else
+                               outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
+               } else if (mp->format == FORMAT_DELTA) {
+                       outp += sprintf(outp, "\t%8lld", c->counter[i]);
+               } else if (mp->format == FORMAT_PERCENT) {
+                       outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
+               }
+       }
 
        /* print per-package data only for 1st core in package */
        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
@@ -620,42 +685,42 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
        /* PkgTmp */
        if (do_ptm)
-               outp += sprintf(outp, "%8d", p->pkg_temp_c);
+               outp += sprintf(outp, "\t%d", p->pkg_temp_c);
 
        /* GFXrc6 */
        if (do_gfx_rc6_ms) {
-               if (p->gfx_rc6_ms == -1) {      /* detect counter reset */
-                       outp += sprintf(outp, "  ***.**");
+               if (p->gfx_rc6_ms == -1) {      /* detect GFX counter reset */
+                       outp += sprintf(outp, "\t**.**");
                } else {
-                       outp += sprintf(outp, "%8.2f",
+                       outp += sprintf(outp, "\t%.2f",
                                p->gfx_rc6_ms / 10.0 / interval_float);
                }
        }
 
        /* GFXMHz */
        if (do_gfx_mhz)
-               outp += sprintf(outp, "%8d", p->gfx_mhz);
+               outp += sprintf(outp, "\t%d", p->gfx_mhz);
 
        /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
        if (do_skl_residency) {
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
        }
 
        if (do_pc2)
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
        if (do_pc3)
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
        if (do_pc6)
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
        if (do_pc7)
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
        if (do_c8_c9_c10) {
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc);
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc);
-               outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
+               outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
        }
 
        /*
@@ -663,14 +728,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
         * indicate that results are suspect by printing "**" in fraction place.
         */
        if (interval_float < rapl_joule_counter_range)
-               fmt8 = "%8.2f";
+               fmt8 = "\t%.2f";
        else
-               fmt8 = " %6.0f**";
+               fmt8 = "%6.0f**";
 
        if (do_rapl && !rapl_joules) {
                if (do_rapl & RAPL_PKG)
                        outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
-               if (do_rapl & RAPL_CORES)
+               if (do_rapl & RAPL_CORES_ENERGY_STATUS)
                        outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
                if (do_rapl & RAPL_GFX)
                        outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
@@ -697,9 +762,20 @@ int format_counters(struct thread_data *t, struct core_data *c,
                        outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
                if (do_rapl & RAPL_DRAM_PERF_STATUS)
                        outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
-
-               outp += sprintf(outp, fmt8, interval_float);
        }
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW) {
+                       if (mp->width == 32)
+                               outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
+                       else
+                               outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
+               } else if (mp->format == FORMAT_DELTA) {
+                       outp += sprintf(outp, "\t%8lld", p->counter[i]);
+               } else if (mp->format == FORMAT_PERCENT) {
+                       outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
+               }
+       }
+
 done:
        outp += sprintf(outp, "\n");
 
@@ -752,9 +828,11 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
                old = 0x100000000 + new - old;  \
        }
 
-void
+int
 delta_package(struct pkg_data *new, struct pkg_data *old)
 {
+       int i;
+       struct msr_counter *mp;
 
        if (do_skl_residency) {
                old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
@@ -788,24 +866,46 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
        DELTA_WRAP32(new->energy_dram, old->energy_dram);
        DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
        DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
+
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       old->counter[i] = new->counter[i];
+               else
+                       old->counter[i] = new->counter[i] - old->counter[i];
+       }
+
+       return 0;
 }
 
 void
 delta_core(struct core_data *new, struct core_data *old)
 {
+       int i;
+       struct msr_counter *mp;
+
        old->c3 = new->c3 - old->c3;
        old->c6 = new->c6 - old->c6;
        old->c7 = new->c7 - old->c7;
        old->core_temp_c = new->core_temp_c;
+
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       old->counter[i] = new->counter[i];
+               else
+                       old->counter[i] = new->counter[i] - old->counter[i];
+       }
 }
 
 /*
  * old = new - old
  */
-void
+int
 delta_thread(struct thread_data *new, struct thread_data *old,
        struct core_data *core_delta)
 {
+       int i;
+       struct msr_counter *mp;
+
        old->tsc = new->tsc - old->tsc;
 
        /* check for TSC < 1 Mcycles over interval */
@@ -821,20 +921,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
                        old->aperf = new->aperf - old->aperf;
                        old->mperf = new->mperf - old->mperf;
                } else {
-
-                       if (!aperf_mperf_unstable) {
-                               fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
-                               fprintf(outf, "* Frequency results do not cover entire interval *\n");
-                               fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
-
-                               aperf_mperf_unstable = 1;
-                       }
-                       /*
-                        * mperf delta is likely a huge "positive" number
-                        * can not use it for calculating c0 time
-                        */
-                       skip_c0 = 1;
-                       skip_c1 = 1;
+                       return -1;
                }
        }
 
@@ -865,52 +952,53 @@ delta_thread(struct thread_data *new, struct thread_data *old,
                old->mperf = 1; /* divide by 0 protection */
        }
 
-       old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
-       old->extra_delta32 &= 0xFFFFFFFF;
-
-       old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
-
-       /*
-        * Extra MSR is just a snapshot, simply copy latest w/o subtracting
-        */
-       old->extra_msr32 = new->extra_msr32;
-       old->extra_msr64 = new->extra_msr64;
-
        if (do_irq)
                old->irq_count = new->irq_count - old->irq_count;
 
        if (do_smi)
                old->smi_count = new->smi_count - old->smi_count;
+
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       old->counter[i] = new->counter[i];
+               else
+                       old->counter[i] = new->counter[i] - old->counter[i];
+       }
+       return 0;
 }
 
 int delta_cpu(struct thread_data *t, struct core_data *c,
        struct pkg_data *p, struct thread_data *t2,
        struct core_data *c2, struct pkg_data *p2)
 {
+       int retval = 0;
+
        /* calculate core delta only for 1st thread in core */
        if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
                delta_core(c, c2);
 
        /* always calculate thread delta */
-       delta_thread(t, t2, c2);        /* c2 is core delta */
+       retval = delta_thread(t, t2, c2);       /* c2 is core delta */
+       if (retval)
+               return retval;
 
        /* calculate package delta only for 1st core in package */
        if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
-               delta_package(p, p2);
+               retval = delta_package(p, p2);
 
-       return 0;
+       return retval;
 }
 
 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
+       int i;
+       struct msr_counter  *mp;
+
        t->tsc = 0;
        t->aperf = 0;
        t->mperf = 0;
        t->c1 = 0;
 
-       t->extra_delta32 = 0;
-       t->extra_delta64 = 0;
-
        t->irq_count = 0;
        t->smi_count = 0;
 
@@ -948,21 +1036,36 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
        p->gfx_rc6_ms = 0;
        p->gfx_mhz = 0;
+
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
+               t->counter[i] = 0;
+
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
+               c->counter[i] = 0;
+
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
+               p->counter[i] = 0;
 }
 int sum_counters(struct thread_data *t, struct core_data *c,
        struct pkg_data *p)
 {
+       int i;
+       struct msr_counter *mp;
+
        average.threads.tsc += t->tsc;
        average.threads.aperf += t->aperf;
        average.threads.mperf += t->mperf;
        average.threads.c1 += t->c1;
 
-       average.threads.extra_delta32 += t->extra_delta32;
-       average.threads.extra_delta64 += t->extra_delta64;
-
        average.threads.irq_count += t->irq_count;
        average.threads.smi_count += t->smi_count;
 
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.threads.counter[i] += t->counter[i];
+       }
+
        /* sum per-core values only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
@@ -973,6 +1076,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 
        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
 
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.cores.counter[i] += c->counter[i];
+       }
+
        /* sum per-pkg values only for 1st core in pkg */
        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
                return 0;
@@ -1007,6 +1116,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 
        average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
        average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.packages.counter[i] += p->counter[i];
+       }
        return 0;
 }
 /*
@@ -1016,6 +1131,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 void compute_average(struct thread_data *t, struct core_data *c,
        struct pkg_data *p)
 {
+       int i;
+       struct msr_counter *mp;
+
        clear_counters(&average.threads, &average.cores, &average.packages);
 
        for_all_cpus(sum_counters, t, c, p);
@@ -1025,11 +1143,6 @@ void compute_average(struct thread_data *t, struct core_data *c,
        average.threads.mperf /= topo.num_cpus;
        average.threads.c1 /= topo.num_cpus;
 
-       average.threads.extra_delta32 /= topo.num_cpus;
-       average.threads.extra_delta32 &= 0xFFFFFFFF;
-
-       average.threads.extra_delta64 /= topo.num_cpus;
-
        average.cores.c3 /= topo.num_cores;
        average.cores.c6 /= topo.num_cores;
        average.cores.c7 /= topo.num_cores;
@@ -1052,6 +1165,22 @@ void compute_average(struct thread_data *t, struct core_data *c,
        average.packages.pc8 /= topo.num_packages;
        average.packages.pc9 /= topo.num_packages;
        average.packages.pc10 /= topo.num_packages;
+
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.threads.counter[i] /= topo.num_cpus;
+       }
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.cores.counter[i] /= topo.num_cores;
+       }
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (mp->format == FORMAT_RAW)
+                       continue;
+               average.packages.counter[i] /= topo.num_packages;
+       }
 }
 
 static unsigned long long rdtsc(void)
@@ -1073,6 +1202,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        int cpu = t->cpu_id;
        unsigned long long msr;
        int aperf_mperf_retry_count = 0;
+       struct msr_counter *mp;
+       int i;
 
        if (cpu_migrate(cpu)) {
                fprintf(outf, "Could not migrate to CPU %d\n", cpu);
@@ -1145,31 +1276,18 @@ retry:
                        return -5;
                t->smi_count = msr & 0xFFFFFFFF;
        }
-       if (extra_delta_offset32) {
-               if (get_msr(cpu, extra_delta_offset32, &msr))
-                       return -5;
-               t->extra_delta32 = msr & 0xFFFFFFFF;
-       }
-
-       if (extra_delta_offset64)
-               if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
-                       return -5;
-
-       if (extra_msr_offset32) {
-               if (get_msr(cpu, extra_msr_offset32, &msr))
-                       return -5;
-               t->extra_msr32 = msr & 0xFFFFFFFF;
-       }
-
-       if (extra_msr_offset64)
-               if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
-                       return -5;
 
        if (use_c1_residency_msr) {
                if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
                        return -6;
        }
 
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (get_msr(cpu, mp->msr_num, &t->counter[i]))
+                       return -10;
+       }
+
+
        /* collect core counters only for 1st thread in core */
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
@@ -1197,6 +1315,10 @@ retry:
                c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
        }
 
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (get_msr(cpu, mp->msr_num, &c->counter[i]))
+                       return -10;
+       }
 
        /* collect package counters only for 1st core in package */
        if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
@@ -1237,7 +1359,7 @@ retry:
                        return -13;
                p->energy_pkg = msr & 0xFFFFFFFF;
        }
-       if (do_rapl & RAPL_CORES) {
+       if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
                if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
                        return -14;
                p->energy_cores = msr & 0xFFFFFFFF;
@@ -1274,6 +1396,11 @@ retry:
        if (do_gfx_mhz)
                p->gfx_mhz = gfx_cur_mhz;
 
+       for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+               if (get_msr(cpu, mp->msr_num, &p->counter[i]))
+                       return -10;
+       }
+
        return 0;
 }
 
@@ -1310,6 +1437,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV,
 int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
 
 static void
@@ -1638,7 +1766,7 @@ void free_fd_percpu(void)
 {
        int i;
 
-       for (i = 0; i < topo.max_cpu_num; ++i) {
+       for (i = 0; i < topo.max_cpu_num + 1; ++i) {
                if (fd_percpu[i] != 0)
                        close(fd_percpu[i]);
        }
@@ -2071,7 +2199,10 @@ restart:
                }
                gettimeofday(&tv_odd, (struct timezone *)NULL);
                timersub(&tv_odd, &tv_even, &tv_delta);
-               for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
+               if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
+                       re_initialize();
+                       goto restart;
+               }
                compute_average(EVEN_COUNTERS);
                format_all_counters(EVEN_COUNTERS);
                flush_output_stdout();
@@ -2087,7 +2218,10 @@ restart:
                }
                gettimeofday(&tv_even, (struct timezone *)NULL);
                timersub(&tv_even, &tv_odd, &tv_delta);
-               for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
+               if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
+                       re_initialize();
+                       goto restart;
+               }
                compute_average(ODD_COUNTERS);
                format_all_counters(ODD_COUNTERS);
                flush_output_stdout();
@@ -2174,47 +2308,51 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
        bclk = discover_bclk(family, model);
 
        switch (model) {
-       case 0x1A:      /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
-       case 0x1E:      /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
+       case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
+       case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
        case 0x1F:      /* Core i7 and i5 Processor - Nehalem */
-       case 0x25:      /* Westmere Client - Clarkdale, Arrandale */
-       case 0x2C:      /* Westmere EP - Gulftown */
-       case 0x2E:      /* Nehalem-EX Xeon - Beckton */
-       case 0x2F:      /* Westmere-EX Xeon - Eagleton */
+       case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
+       case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
+       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
+       case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
                pkg_cstate_limits = nhm_pkg_cstate_limits;
                break;
-       case 0x2A:      /* SNB */
-       case 0x2D:      /* SNB Xeon */
-       case 0x3A:      /* IVB */
-       case 0x3E:      /* IVB Xeon */
+       case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
+       case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
+       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
+       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
                pkg_cstate_limits = snb_pkg_cstate_limits;
                break;
-       case 0x3C:      /* HSW */
-       case 0x3F:      /* HSX */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x55:      /* SKX */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                pkg_cstate_limits = hsw_pkg_cstate_limits;
                break;
-       case 0x37:      /* BYT */
-       case 0x4D:      /* AVN */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+               pkg_cstate_limits = skx_pkg_cstate_limits;
+               break;
+       case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
+       case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
                pkg_cstate_limits = slv_pkg_cstate_limits;
                break;
-       case 0x4C:      /* AMT */
+       case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
                pkg_cstate_limits = amt_pkg_cstate_limits;
                break;
-       case 0x57:      /* PHI */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
+       case INTEL_FAM6_XEON_PHI_KNM:
                pkg_cstate_limits = phi_pkg_cstate_limits;
                break;
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
+       case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
                pkg_cstate_limits = bxt_pkg_cstate_limits;
                break;
        default:
@@ -2234,9 +2372,10 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
        switch (model) {
        /* Nehalem compatible, but do not include turbo-ratio limit support */
-       case 0x2E:      /* Nehalem-EX Xeon - Beckton */
-       case 0x2F:      /* Westmere-EX Xeon - Eagleton */
-       case 0x57:      /* PHI - Knights Landing (different MSR definition) */
+       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
+       case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 0;
        default:
                return 1;
@@ -2251,8 +2390,8 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x3E:      /* IVB Xeon */
-       case 0x3F:      /* HSW Xeon */
+       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
+       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
                return 1;
        default:
                return 0;
@@ -2267,7 +2406,7 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x3F:      /* HSW Xeon */
+       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
                return 1;
        default:
                return 0;
@@ -2283,7 +2422,8 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x57:      /* Knights Landing */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 1;
        default:
                return 0;
@@ -2298,22 +2438,23 @@ int has_config_tdp(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x3A:      /* IVB */
-       case 0x3C:      /* HSW */
-       case 0x3F:      /* HSX */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x55:      /* SKX */
-
-       case 0x57:      /* Knights Landing */
+       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+
+       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 1;
        default:
                return 0;
@@ -2593,8 +2734,8 @@ double get_tdp(unsigned int model)
                        return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
 
        switch (model) {
-       case 0x37:
-       case 0x4D:
+       case INTEL_FAM6_ATOM_SILVERMONT1:
+       case INTEL_FAM6_ATOM_SILVERMONT2:
                return 30.0;
        default:
                return 135.0;
@@ -2611,10 +2752,11 @@ rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
        /* only called for genuine_intel, family 6 */
 
        switch (model) {
-       case 0x3F:      /* HSX */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x57:      /* KNL */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return (rapl_dram_energy_units = 15.3 / 1000000);
        default:
                return (rapl_energy_units);
@@ -2640,38 +2782,42 @@ void rapl_probe(unsigned int family, unsigned int model)
                return;
 
        switch (model) {
-       case 0x2A:
-       case 0x3A:
-       case 0x3C:      /* HSW */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
+       case INTEL_FAM6_SANDYBRIDGE:
+       case INTEL_FAM6_IVYBRIDGE:
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
                break;
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
                do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
                break;
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
                break;
-       case 0x3F:      /* HSX */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x55:      /* SKX */
-       case 0x57:      /* KNL */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNM:
                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
                break;
-       case 0x2D:
-       case 0x3E:
+       case INTEL_FAM6_SANDYBRIDGE_X:
+       case INTEL_FAM6_IVYBRIDGE_X:
                do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
                break;
-       case 0x37:      /* BYT */
-       case 0x4D:      /* AVN */
-               do_rapl = RAPL_PKG | RAPL_CORES ;
+       case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
+       case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
+               do_rapl = RAPL_PKG | RAPL_CORES;
+               break;
+       case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
+               do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
                break;
        default:
                return;
@@ -2682,7 +2828,7 @@ void rapl_probe(unsigned int family, unsigned int model)
                return;
 
        rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       if (model == 0x37)
+       if (model == INTEL_FAM6_ATOM_SILVERMONT1)
                rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
        else
                rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
@@ -2713,11 +2859,11 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
                return;
 
        switch (model) {
-       case 0x3C:      /* HSW */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
                do_gfx_perf_limit_reasons = 1;
-       case 0x3F:      /* HSX */
+       case INTEL_FAM6_HASWELL_X:      /* HSX */
                do_core_perf_limit_reasons = 1;
                do_ring_perf_limit_reasons = 1;
        default:
@@ -2737,7 +2883,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
        cpu = t->cpu_id;
 
        /* DTS is per-core, no need to print for each thread */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 
+       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -2886,9 +3032,8 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
                }
        }
-       if (do_rapl & RAPL_CORES) {
+       if (do_rapl & RAPL_CORES_POWER_LIMIT) {
                if (debug) {
-
                        if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
                                return -9;
                        fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -2927,24 +3072,25 @@ int has_snb_msrs(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x2A:
-       case 0x2D:
-       case 0x3A:      /* IVB */
-       case 0x3E:      /* IVB Xeon */
-       case 0x3C:      /* HSW */
-       case 0x3F:      /* HSW */
-       case 0x45:      /* HSW */
-       case 0x46:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x47:      /* BDW */
-       case 0x4F:      /* BDX */
-       case 0x56:      /* BDX-DE */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x55:      /* SKX */
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_SANDYBRIDGE:
+       case INTEL_FAM6_SANDYBRIDGE_X:
+       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
+       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
+       case INTEL_FAM6_HASWELL_CORE:   /* HSW */
+       case INTEL_FAM6_HASWELL_X:      /* HSW */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_HASWELL_GT3E:   /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+       case INTEL_FAM6_BROADWELL_X:    /* BDX */
+       case INTEL_FAM6_BROADWELL_XEON_D:       /* BDX-DE */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
+       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
+       case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
                return 1;
        }
        return 0;
@@ -2968,13 +3114,13 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x45:      /* HSW */
-       case 0x3D:      /* BDW */
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
-       case 0x5C:      /* BXT */
+       case INTEL_FAM6_HASWELL_ULT:    /* HSW */
+       case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
+       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
                return 1;
        }
        return 0;
@@ -2994,10 +3140,10 @@ int has_skl_msrs(unsigned int family, unsigned int model)
                return 0;
 
        switch (model) {
-       case 0x4E:      /* SKL */
-       case 0x5E:      /* SKL */
-       case 0x8E:      /* KBL */
-       case 0x9E:      /* KBL */
+       case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+       case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+       case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+       case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                return 1;
        }
        return 0;
@@ -3010,8 +3156,8 @@ int is_slm(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
        switch (model) {
-       case 0x37:      /* BYT */
-       case 0x4D:      /* AVN */
+       case INTEL_FAM6_ATOM_SILVERMONT1:       /* BYT */
+       case INTEL_FAM6_ATOM_SILVERMONT2:       /* AVN */
                return 1;
        }
        return 0;
@@ -3022,7 +3168,8 @@ int is_knl(unsigned int family, unsigned int model)
        if (!genuine_intel)
                return 0;
        switch (model) {
-       case 0x57:      /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
+       case INTEL_FAM6_XEON_PHI_KNM:
                return 1;
        }
        return 0;
@@ -3050,7 +3197,7 @@ double slm_bclk(void)
        i = msr & 0xf;
        if (i >= SLM_BCLK_FREQS) {
                fprintf(outf, "SLM BCLK[%d] invalid\n", i);
-               msr = 3;
+               i = 3;
        }
        freq = slm_freq_table[i];
 
@@ -3174,10 +3321,11 @@ void decode_misc_pwr_mgmt_msr(void)
                return;
 
        if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
-               fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
+               fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
                        base_cpu, msr,
                        msr & (1 << 0) ? "DIS" : "EN",
-                       msr & (1 << 1) ? "EN" : "DIS");
+                       msr & (1 << 1) ? "EN" : "DIS",
+                       msr & (1 << 8) ? "EN" : "DIS");
 }
 
 void process_cpuid()
@@ -3303,16 +3451,17 @@ void process_cpuid()
 
                        if (crystal_hz == 0)
                                switch(model) {
-                               case 0x4E:      /* SKL */
-                               case 0x5E:      /* SKL */
-                               case 0x8E:      /* KBL */
-                               case 0x9E:      /* KBL */
+                               case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+                               case INTEL_FAM6_SKYLAKE_DESKTOP:        /* SKL */
+                               case INTEL_FAM6_KABYLAKE_MOBILE:        /* KBL */
+                               case INTEL_FAM6_KABYLAKE_DESKTOP:       /* KBL */
                                        crystal_hz = 24000000;  /* 24.0 MHz */
                                        break;
-                               case 0x55:      /* SKX */
+                               case INTEL_FAM6_SKYLAKE_X:      /* SKX */
+                               case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
                                        crystal_hz = 25000000;  /* 25.0 MHz */
                                        break;
-                               case 0x5C:      /* BXT */
+                               case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
                                        crystal_hz = 19200000;  /* 19.2 MHz */
                                        break;
                                default:
@@ -3385,14 +3534,12 @@ void help()
        "when COMMAND completes.\n"
        "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
        "to print statistics, until interrupted.\n"
+       "--add          add a counter\n"
+       "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
        "--debug        run in \"debug\" mode\n"
        "--interval sec Override default 5-second measurement interval\n"
        "--help         print this help message\n"
-       "--counter msr  print 32-bit counter at address \"msr\"\n"
-       "--Counter msr  print 64-bit Counter at address \"msr\"\n"
        "--out file     create or truncate \"file\" for all output\n"
-       "--msr msr      print 32-bit value at address \"msr\"\n"
-       "--MSR msr      print 64-bit Value at address \"msr\"\n"
        "--version      print version information\n"
        "\n"
        "For more help, run \"man turbostat\"\n");
@@ -3515,7 +3662,7 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
        int i;
 
        *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
-               topo.num_packages, sizeof(struct thread_data));
+               topo.num_packages, sizeof(struct thread_data) + sys.thread_counter_bytes);
        if (*t == NULL)
                goto error;
 
@@ -3524,14 +3671,14 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
                (*t)[i].cpu_id = -1;
 
        *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
-               sizeof(struct core_data));
+               sizeof(struct core_data) + sys.core_counter_bytes);
        if (*c == NULL)
                goto error;
 
        for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
                (*c)[i].core_id = -1;
 
-       *p = calloc(topo.num_packages, sizeof(struct pkg_data));
+       *p = calloc(topo.num_packages, sizeof(struct pkg_data) + sys.package_counter_bytes);
        if (*p == NULL)
                goto error;
 
@@ -3598,7 +3745,7 @@ void allocate_output_buffer()
 }
 void allocate_fd_percpu(void)
 {
-       fd_percpu = calloc(topo.max_cpu_num, sizeof(int));
+       fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
        if (fd_percpu == NULL)
                err(-1, "calloc fd_percpu");
 }
@@ -3608,9 +3755,9 @@ void allocate_irq_buffers(void)
        if (irq_column_2_cpu == NULL)
                err(-1, "calloc %d", topo.num_cpus);
 
-       irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int));
+       irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
        if (irqs_per_cpu == NULL)
-               err(-1, "calloc %d", topo.max_cpu_num);
+               err(-1, "calloc %d", topo.max_cpu_num + 1);
 }
 void setup_all_buffers(void)
 {
@@ -3697,9 +3844,12 @@ int fork_it(char **argv)
        for_all_cpus(get_counters, ODD_COUNTERS);
        gettimeofday(&tv_odd, (struct timezone *)NULL);
        timersub(&tv_odd, &tv_even, &tv_delta);
-       for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
-       compute_average(EVEN_COUNTERS);
-       format_all_counters(EVEN_COUNTERS);
+       if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
+               fprintf(outf, "%s: Counter reset detected\n", progname);
+       else {
+               compute_average(EVEN_COUNTERS);
+               format_all_counters(EVEN_COUNTERS);
+       }
 
        fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
 
@@ -3726,24 +3876,170 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(outf, "turbostat version 4.12 5 Apr 2016"
+       fprintf(outf, "turbostat version 4.16 24 Dec 2016"
                " - Len Brown <lenb@kernel.org>\n");
 }
 
+int add_counter(unsigned int msr_num, char *name, unsigned int width,
+       enum counter_scope scope, enum counter_type type,
+       enum counter_format format)
+{
+       struct msr_counter *msrp;
+
+       msrp = calloc(1, sizeof(struct msr_counter));
+       if (msrp == NULL) {
+               perror("calloc");
+               exit(1);
+       }
+
+       msrp->msr_num = msr_num;
+       strncpy(msrp->name, name, NAME_BYTES);
+       msrp->width = width;
+       msrp->type = type;
+       msrp->format = format;
+
+       switch (scope) {
+
+       case SCOPE_CPU:
+               sys.thread_counter_bytes += 64;
+               msrp->next = sys.tp;
+               sys.tp = msrp;
+               sys.thread_counter_bytes += sizeof(unsigned long long);
+               break;
+
+       case SCOPE_CORE:
+               sys.core_counter_bytes += 64;
+               msrp->next = sys.cp;
+               sys.cp = msrp;
+               sys.core_counter_bytes += sizeof(unsigned long long);
+               break;
+
+       case SCOPE_PACKAGE:
+               sys.package_counter_bytes += 64;
+               msrp->next = sys.pp;
+               sys.pp = msrp;
+               sys.package_counter_bytes += sizeof(unsigned long long);
+               break;
+       }
+
+       return 0;
+}
+
+void parse_add_command(char *add_command)
+{
+       int msr_num = 0;
+       char name_buffer[NAME_BYTES];
+       int width = 64;
+       int fail = 0;
+       enum counter_scope scope = SCOPE_CPU;
+       enum counter_type type = COUNTER_CYCLES;
+       enum counter_format format = FORMAT_DELTA;
+
+       while (add_command) {
+
+               if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
+                       goto next;
+
+               if (sscanf(add_command, "msr%d", &msr_num) == 1)
+                       goto next;
+
+               if (sscanf(add_command, "u%d", &width) == 1) {
+                       if ((width == 32) || (width == 64))
+                               goto next;
+                       width = 64;
+               }
+               if (!strncmp(add_command, "cpu", strlen("cpu"))) {
+                       scope = SCOPE_CPU;
+                       goto next;
+               }
+               if (!strncmp(add_command, "core", strlen("core"))) {
+                       scope = SCOPE_CORE;
+                       goto next;
+               }
+               if (!strncmp(add_command, "package", strlen("package"))) {
+                       scope = SCOPE_PACKAGE;
+                       goto next;
+               }
+               if (!strncmp(add_command, "cycles", strlen("cycles"))) {
+                       type = COUNTER_CYCLES;
+                       goto next;
+               }
+               if (!strncmp(add_command, "seconds", strlen("seconds"))) {
+                       type = COUNTER_SECONDS;
+                       goto next;
+               }
+               if (!strncmp(add_command, "raw", strlen("raw"))) {
+                       format = FORMAT_RAW;
+                       goto next;
+               }
+               if (!strncmp(add_command, "delta", strlen("delta"))) {
+                       format = FORMAT_DELTA;
+                       goto next;
+               }
+               if (!strncmp(add_command, "percent", strlen("percent"))) {
+                       format = FORMAT_PERCENT;
+                       goto next;
+               }
+
+               if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {        /* 18 < NAME_BYTES */
+                       char *eos;
+
+                       eos = strchr(name_buffer, ',');
+                       if (eos)
+                               *eos = '\0';
+                       goto next;
+               }
+
+next:
+               add_command = strchr(add_command, ',');
+               if (add_command)
+                       add_command++;
+
+       }
+       if (msr_num == 0) {
+               fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
+               fail++;
+       }
+
+       /* generate default column header */
+       if (*name_buffer == '\0') {
+               if (format == FORMAT_RAW) {
+                       if (width == 32)
+                               sprintf(name_buffer, "msr%d", msr_num);
+                       else
+                               sprintf(name_buffer, "MSR%d", msr_num);
+               } else if (format == FORMAT_DELTA) {
+                       if (width == 32)
+                               sprintf(name_buffer, "cnt%d", msr_num);
+                       else
+                               sprintf(name_buffer, "CNT%d", msr_num);
+               } else if (format == FORMAT_PERCENT) {
+                       if (width == 32)
+                               sprintf(name_buffer, "msr%d%%", msr_num);
+                       else
+                               sprintf(name_buffer, "MSR%d%%", msr_num);
+               }
+       }
+
+       if (add_counter(msr_num, name_buffer, width, scope, type, format))
+               fail++;
+
+       if (fail) {
+               help();
+               exit(1);
+       }
+}
 void cmdline(int argc, char **argv)
 {
        int opt;
        int option_index = 0;
        static struct option long_options[] = {
-               {"Counter",     required_argument,      0, 'C'},
-               {"counter",     required_argument,      0, 'c'},
+               {"add",         required_argument,      0, 'a'},
                {"Dump",        no_argument,            0, 'D'},
                {"debug",       no_argument,            0, 'd'},
                {"interval",    required_argument,      0, 'i'},
                {"help",        no_argument,            0, 'h'},
                {"Joules",      no_argument,            0, 'J'},
-               {"MSR",         required_argument,      0, 'M'},
-               {"msr",         required_argument,      0, 'm'},
                {"out",         required_argument,      0, 'o'},
                {"Package",     no_argument,            0, 'p'},
                {"processor",   no_argument,            0, 'p'},
@@ -3758,11 +4054,8 @@ void cmdline(int argc, char **argv)
        while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
                                long_options, &option_index)) != -1) {
                switch (opt) {
-               case 'C':
-                       sscanf(optarg, "%x", &extra_delta_offset64);
-                       break;
-               case 'c':
-                       sscanf(optarg, "%x", &extra_delta_offset32);
+               case 'a':
+                       parse_add_command(optarg);
                        break;
                case 'D':
                        dump_only++;
@@ -3791,12 +4084,6 @@ void cmdline(int argc, char **argv)
                case 'J':
                        rapl_joules++;
                        break;
-               case 'M':
-                       sscanf(optarg, "%x", &extra_msr_offset64);
-                       break;
-               case 'm':
-                       sscanf(optarg, "%x", &extra_msr_offset32);
-                       break;
                case 'o':
                        outf = fopen_or_die(optarg, "w");
                        break;
index 97b657adb3bdd870543fd61c4429f00039ac8f51..a2dbbccbb6a3fe96751fafde8cd01658aa1301a5 100644 (file)
@@ -456,7 +456,7 @@ int kvm_timer_hyp_init(void)
        kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
 
        cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
-                         "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu,
+                         "kvm/arm/timer:starting", kvm_timer_starting_cpu,
                          kvm_timer_dying_cpu);
        return err;
 }
index 8cebfbc19e90ef053ec325f7d7d75ab7e626a049..5114391b7e5af52ee5f815baead6b4561243a31a 100644 (file)
@@ -428,7 +428,7 @@ int kvm_vgic_hyp_init(void)
        }
 
        ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
-                               "AP_KVM_ARM_VGIC_INIT_STARTING",
+                               "kvm/arm/vgic:starting",
                                vgic_init_cpu_starting, vgic_init_cpu_dying);
        if (ret) {
                kvm_err("Cannot register vgic CPU notifier\n");
index 994f81f8eecb67fed42c5fba956c163d22640239..482612b4e496f92527d5191a2a8adfa5962e4de8 100644 (file)
@@ -3944,7 +3944,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
                        goto out_free_1;
        }
 
-       r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING",
+       r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
                                      kvm_starting_cpu, kvm_dying_cpu);
        if (r)
                goto out_free_2;