Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Mar 2016 01:43:51 +0000 (18:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Mar 2016 01:43:51 +0000 (18:43 -0700)
Pull RAS updates from Ingo Molnar:
 "Various RAS updates:

   - AMD MCE support updates for future CPUs, fixes and 'SMCA' (Scalable
     MCA) error decoding support (Aravind Gopalakrishnan)

   - x86 memcpy_mcsafe() support, to enable smart(er) hardware error
     recovery in NVDIMM drivers, based on an extension of the x86
     exception handling code.  (Tony Luck)"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  EDAC/sb_edac: Fix computation of channel address
  x86/mm, x86/mce: Add memcpy_mcsafe()
  x86/mce/AMD: Document some functionality
  x86/mce: Clarify comments regarding deferred error
  x86/mce/AMD: Fix logic to obtain block address
  x86/mce/AMD, EDAC: Enable error decoding of Scalable MCA errors
  x86/mce: Move MCx_CONFIG MSR definitions
  x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries
  x86/mm: Expand the exception table logic to allow new handling options
  x86/mce/AMD: Set MCAX Enable bit
  x86/mce/AMD: Carve out threshold block preparation
  x86/mce/AMD: Fix LVT offset configuration for thresholding
  x86/mce/AMD: Reduce number of blocks scanned per bank
  x86/mce/AMD: Do not perform shared bank check for future processors
  x86/mce: Fix order of AMD MCE init function call

485 files changed:
Documentation/devicetree/bindings/arm/omap/omap.txt
Documentation/kernel-parameters.txt
Documentation/sysctl/kernel.txt
Documentation/virtual/kvm/mmu.txt
MAINTAINERS
Makefile
arch/arm/boot/dts/armada-xp-axpwifiap.dts
arch/arm/boot/dts/armada-xp-db.dts
arch/arm/boot/dts/armada-xp-gp.dts
arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts
arch/arm/boot/dts/armada-xp-linksys-mamba.dts
arch/arm/boot/dts/armada-xp-matrix.dts
arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
arch/arm/boot/dts/armada-xp-synology-ds414.dts
arch/arm/boot/dts/dra7.dtsi
arch/arm/include/asm/cacheflush.h
arch/arm/kernel/setup.c
arch/arm/mach-lpc32xx/phy3250.c
arch/arm/mach-netx/fb.c
arch/arm/mach-nspire/clcd.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mach-omap2/omap_hwmod.h
arch/arm/plat-samsung/pm-check.c
arch/arm/vdso/vdso.S
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/setup.c
arch/arm64/kernel/sleep.S
arch/arm64/mm/hugetlbpage.c
arch/avr32/kernel/setup.c
arch/c6x/kernel/setup.c
arch/ia64/kernel/efi.c
arch/ia64/kernel/setup.c
arch/m32r/kernel/setup.c
arch/microblaze/kernel/setup.c
arch/mips/Kconfig
arch/mips/boot/compressed/uart-16550.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp.c
arch/parisc/include/asm/cache.h
arch/parisc/include/asm/cacheflush.h
arch/parisc/mm/init.c
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/mm/mem.c
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/pgalloc.h
arch/s390/kernel/early.c
arch/s390/kernel/head64.S
arch/s390/kernel/setup.c
arch/s390/kvm/kvm-s390.c
arch/score/kernel/setup.c
arch/sh/kernel/setup.c
arch/sparc/kernel/head_64.S
arch/sparc/mm/init_64.c
arch/tile/kernel/setup.c
arch/unicore32/kernel/setup.c
arch/x86/Kbuild
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/entry/vdso/vdso2c.h
arch/x86/events/Makefile [new file with mode: 0644]
arch/x86/events/amd/core.c [new file with mode: 0644]
arch/x86/events/amd/ibs.c [new file with mode: 0644]
arch/x86/events/amd/iommu.c [new file with mode: 0644]
arch/x86/events/amd/iommu.h [new file with mode: 0644]
arch/x86/events/amd/uncore.c [new file with mode: 0644]
arch/x86/events/core.c [new file with mode: 0644]
arch/x86/events/intel/bts.c [new file with mode: 0644]
arch/x86/events/intel/core.c [new file with mode: 0644]
arch/x86/events/intel/cqm.c [new file with mode: 0644]
arch/x86/events/intel/cstate.c [new file with mode: 0644]
arch/x86/events/intel/ds.c [new file with mode: 0644]
arch/x86/events/intel/knc.c [new file with mode: 0644]
arch/x86/events/intel/lbr.c [new file with mode: 0644]
arch/x86/events/intel/p4.c [new file with mode: 0644]
arch/x86/events/intel/p6.c [new file with mode: 0644]
arch/x86/events/intel/pt.c [new file with mode: 0644]
arch/x86/events/intel/pt.h [new file with mode: 0644]
arch/x86/events/intel/rapl.c [new file with mode: 0644]
arch/x86/events/intel/uncore.c [new file with mode: 0644]
arch/x86/events/intel/uncore.h [new file with mode: 0644]
arch/x86/events/intel/uncore_nhmex.c [new file with mode: 0644]
arch/x86/events/intel/uncore_snb.c [new file with mode: 0644]
arch/x86/events/intel/uncore_snbep.c [new file with mode: 0644]
arch/x86/events/msr.c [new file with mode: 0644]
arch/x86/events/perf_event.h [new file with mode: 0644]
arch/x86/include/asm/barrier.h
arch/x86/include/asm/cacheflush.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/fpu/xstate.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/sections.h
arch/x86/include/asm/topology.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs_64.c
arch/x86/kernel/cpu/centaur.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cyrix.c
arch/x86/kernel/cpu/hypervisor.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/intel_pt.h [deleted file]
arch/x86/kernel/cpu/mcheck/mce-inject.c
arch/x86/kernel/cpu/mcheck/p5.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mcheck/threshold.c
arch/x86/kernel/cpu/mcheck/winchip.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/cpu/mtrr/centaur.c
arch/x86/kernel/cpu/mtrr/cleanup.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/perf_event.c [deleted file]
arch/x86/kernel/cpu/perf_event.h [deleted file]
arch/x86/kernel/cpu/perf_event_amd.c [deleted file]
arch/x86/kernel/cpu/perf_event_amd_ibs.c [deleted file]
arch/x86/kernel/cpu/perf_event_amd_iommu.c [deleted file]
arch/x86/kernel/cpu/perf_event_amd_iommu.h [deleted file]
arch/x86/kernel/cpu/perf_event_amd_uncore.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_bts.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_cqm.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_cstate.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_ds.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_lbr.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_pt.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_rapl.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore.h [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c [deleted file]
arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c [deleted file]
arch/x86/kernel/cpu/perf_event_knc.c [deleted file]
arch/x86/kernel/cpu/perf_event_msr.c [deleted file]
arch/x86/kernel/cpu/perf_event_p4.c [deleted file]
arch/x86/kernel/cpu/perf_event_p6.c [deleted file]
arch/x86/kernel/cpu/rdrand.c
arch/x86/kernel/cpu/topology.c
arch/x86/kernel/cpu/transmeta.c
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/crash.c
arch/x86/kernel/e820.c
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/init.c
arch/x86/kernel/ftrace.c
arch/x86/kernel/kgdb.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/nmi.c
arch/x86/kernel/pmem.c
arch/x86/kernel/process.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/test_nx.c
arch/x86/kernel/test_rodata.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/lguest/boot.c
arch/x86/lib/delay.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/pageattr.c
arch/x86/platform/efi/quirks.c
arch/x86/xen/enlighten.c
arch/x86/xen/pmu.c
drivers/acpi/acpi_platform.c
drivers/acpi/acpica/psargs.c
drivers/acpi/apei/einj.c
drivers/base/property.c
drivers/dma/at_xdmac.c
drivers/dma/fsldma.c
drivers/dma/iop-adma.c
drivers/dma/mv_xor.c
drivers/dma/qcom_bam_dma.c
drivers/edac/sb_edac.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/atombios_dp.c
drivers/gpu/drm/drm_gem_cma_helper.c
drivers/gpu/drm/etnaviv/etnaviv_gpu.c
drivers/gpu/drm/i2c/tda998x_drv.c
drivers/gpu/drm/i915/intel_audio.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_hdmi.c
drivers/gpu/drm/i915/intel_i2c.c
drivers/gpu/drm/imx/ipuv3-crtc.c
drivers/gpu/drm/imx/ipuv3-plane.c
drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
drivers/gpu/drm/omapdrm/omap_gem.c
drivers/gpu/drm/radeon/atombios_dp.c
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/sti/sti_cursor.c
drivers/gpu/drm/sti/sti_gdp.c
drivers/gpu/drm/sti/sti_hqvdp.c
drivers/gpu/drm/tegra/gem.c
drivers/gpu/drm/vc4/vc4_bo.c
drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
drivers/gpu/host1x/cdma.c
drivers/gpu/host1x/job.c
drivers/gpu/ipu-v3/ipu-common.c
drivers/media/media-device.c
drivers/media/platform/coda/coda-bit.c
drivers/misc/lkdtm.c
drivers/mtd/tests/mtd_nandecctest.c
drivers/nvdimm/e820.c
drivers/parisc/eisa_enumerator.c
drivers/pci/pci.c
drivers/rapidio/rio.c
drivers/s390/block/dasd_diag.c
drivers/sh/superhyway/superhyway.c
drivers/spi/spi-imx.c
drivers/spi/spi-rockchip.c
drivers/target/target_core_tmr.c
drivers/video/fbdev/acornfb.c
drivers/video/fbdev/amba-clcd-versatile.c
drivers/video/fbdev/amba-clcd.c
drivers/video/fbdev/atmel_lcdfb.c
drivers/video/fbdev/ep93xx-fb.c
drivers/video/fbdev/gbefb.c
drivers/video/fbdev/imxfb.c
drivers/video/fbdev/mx3fb.c
drivers/video/fbdev/nuc900fb.c
drivers/video/fbdev/omap/lcdc.c
drivers/video/fbdev/pxa168fb.c
drivers/video/fbdev/pxafb.c
drivers/video/fbdev/s3c-fb.c
drivers/video/fbdev/s3c2410fb.c
drivers/video/fbdev/sa1100fb.c
drivers/xen/balloon.c
fs/dax.c
fs/ext4/move_extent.c
fs/jffs2/dir.c
fs/ncpfs/dir.c
fs/ocfs2/mmap.c
fs/xfs/xfs_log_recover.c
include/asm-generic/qspinlock.h
include/asm-generic/qspinlock_types.h
include/asm-generic/vmlinux.lds.h
include/linux/bio.h
include/linux/cache.h
include/linux/compiler.h
include/linux/dma-mapping.h
include/linux/init.h
include/linux/ioport.h
include/linux/kasan.h
include/linux/list.h
include/linux/lockdep.h
include/linux/mm.h
include/linux/perf_event.h
include/linux/tracepoint.h
include/trace/events/asoc.h
include/uapi/linux/media.h
init/main.c
kernel/debug/kdb/kdb_bp.c
kernel/events/core.c
kernel/futex.c
kernel/kexec_core.c
kernel/kexec_file.c
kernel/locking/lockdep.c
kernel/locking/mcs_spinlock.h
kernel/locking/mutex.c
kernel/locking/qspinlock.c
kernel/locking/qspinlock_paravirt.h
kernel/locking/qspinlock_stat.h
kernel/memremap.c
kernel/resource.c
kernel/sched/core.c
kernel/smp.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_syscalls.c
lib/cpumask.c
lib/list_debug.c
lib/test_static_keys.c
mm/filemap.c
mm/hugetlb.c
mm/kasan/kasan.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/mempool.c
scripts/ld-version.sh
sound/arm/pxa2xx-pcm-lib.c
sound/soc/codecs/ab8500-codec.c
sound/soc/codecs/adau17x1.h
sound/soc/codecs/cs42l51.c
sound/soc/codecs/da732x.c
sound/soc/codecs/max98088.c
sound/soc/codecs/max98095.c
sound/soc/codecs/tlv320dac33.c
sound/soc/codecs/wl1273.c
sound/soc/codecs/wm8753.c
sound/soc/codecs/wm8904.c
sound/soc/codecs/wm8958-dsp2.c
sound/soc/codecs/wm8983.c
sound/soc/codecs/wm8985.c
sound/soc/codecs/wm8994.c
sound/soc/codecs/wm8996.c
sound/soc/codecs/wm9081.c
sound/soc/codecs/wm9713.c
sound/soc/codecs/wm_adsp.c
sound/soc/fsl/fsl_ssi.c
sound/soc/fsl/imx-pcm-fiq.c
sound/soc/intel/boards/cht_bsw_rt5645.c
sound/soc/intel/boards/mfld_machine.c
sound/soc/intel/skylake/skl-topology.c
sound/soc/nuc900/nuc900-pcm.c
sound/soc/omap/n810.c
sound/soc/omap/omap-pcm.c
sound/soc/omap/rx51.c
sound/soc/pxa/corgi.c
sound/soc/pxa/magician.c
sound/soc/pxa/poodle.c
sound/soc/pxa/spitz.c
sound/soc/pxa/tosa.c
sound/soc/qcom/lpass-cpu.c
sound/soc/samsung/i2s.c
sound/soc/soc-dapm.c
tools/build/Makefile.build
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-all.c
tools/build/feature/test-compile.c
tools/build/feature/test-libcrypto.c [new file with mode: 0644]
tools/lib/api/Build
tools/lib/api/Makefile
tools/lib/api/debug-internal.h [new file with mode: 0644]
tools/lib/api/debug.c [new file with mode: 0644]
tools/lib/api/debug.h [new file with mode: 0644]
tools/lib/api/fs/fs.c
tools/lib/api/fs/fs.h
tools/lib/bpf/libbpf.c
tools/lib/lockdep/Makefile
tools/lib/lockdep/common.c
tools/lib/lockdep/include/liblockdep/common.h
tools/lib/lockdep/lockdep.c
tools/lib/lockdep/preload.c
tools/lib/lockdep/tests/AA.c
tools/lib/lockdep/tests/ABA.c [new file with mode: 0644]
tools/lib/lockdep/tests/ABBA_2threads.c [new file with mode: 0644]
tools/lib/lockdep/uinclude/linux/compiler.h
tools/lib/traceevent/event-parse.c
tools/lib/traceevent/event-parse.h
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-inject.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perfconfig.example
tools/perf/Documentation/tips.txt
tools/perf/Makefile
tools/perf/Makefile.perf
tools/perf/arch/arm/Makefile
tools/perf/arch/arm64/Makefile
tools/perf/arch/powerpc/Makefile
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/book3s_hcalls.h [new file with mode: 0644]
tools/perf/arch/powerpc/util/book3s_hv_exits.h [new file with mode: 0644]
tools/perf/arch/powerpc/util/kvm-stat.c [new file with mode: 0644]
tools/perf/arch/s390/util/kvm-stat.c
tools/perf/arch/x86/Makefile
tools/perf/arch/x86/tests/rdpmc.c
tools/perf/arch/x86/util/intel-bts.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/arch/x86/util/kvm-stat.c
tools/perf/bench/mem-memcpy-x86-64-asm.S
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-cache.c
tools/perf/builtin-config.c
tools/perf/builtin-diff.c
tools/perf/builtin-help.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/config/Makefile
tools/perf/jvmti/Makefile [new file with mode: 0644]
tools/perf/jvmti/jvmti_agent.c [new file with mode: 0644]
tools/perf/jvmti/jvmti_agent.h [new file with mode: 0644]
tools/perf/jvmti/libjvmti.c [new file with mode: 0644]
tools/perf/perf.c
tools/perf/perf.h
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
tools/perf/tests/.gitignore
tools/perf/tests/Build
tools/perf/tests/bp_signal.c
tools/perf/tests/bpf-script-test-relocation.c [new file with mode: 0644]
tools/perf/tests/bpf.c
tools/perf/tests/code-reading.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_output.c
tools/perf/tests/llvm.c
tools/perf/tests/llvm.h
tools/perf/tests/make
tools/perf/tests/parse-events.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/ui/browser.c
tools/perf/ui/browser.h
tools/perf/ui/browsers/annotate.c
tools/perf/ui/browsers/hists.c
tools/perf/ui/gtk/hists.c
tools/perf/ui/hist.c
tools/perf/ui/stdio/hist.c
tools/perf/util/Build
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/bpf-loader.c
tools/perf/util/bpf-loader.h
tools/perf/util/build-id.c
tools/perf/util/build-id.h
tools/perf/util/cache.h
tools/perf/util/callchain.c
tools/perf/util/color.c
tools/perf/util/config.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/ctype.c
tools/perf/util/data-convert-bt.c
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/demangle-java.c [new file with mode: 0644]
tools/perf/util/demangle-java.h [new file with mode: 0644]
tools/perf/util/dso.c
tools/perf/util/env.c
tools/perf/util/env.h
tools/perf/util/event.c
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/genelf.c [new file with mode: 0644]
tools/perf/util/genelf.h [new file with mode: 0644]
tools/perf/util/genelf_debug.c [new file with mode: 0644]
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/help-unknown-cmd.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/jit.h [new file with mode: 0644]
tools/perf/util/jitdump.c [new file with mode: 0644]
tools/perf/util/jitdump.h [new file with mode: 0644]
tools/perf/util/kvm-stat.h
tools/perf/util/machine.h
tools/perf/util/mem-events.c [new file with mode: 0644]
tools/perf/util/mem-events.h [new file with mode: 0644]
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.l
tools/perf/util/parse-events.y
tools/perf/util/pmu.c
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/setup.py
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat-shadow.c
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/strbuf.c
tools/perf/util/strbuf.h
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/trace-event.c
tools/perf/util/tsc.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/power/x86/turbostat/turbostat.c
virt/kvm/kvm_main.c

index a2bd593881cab361fa739d9a12e63ee7ba63ef50..66422d6631841d9e6eba9be63e453176ebd43f2e 100644 (file)
@@ -23,6 +23,7 @@ Optional properties:
   during suspend.
 - ti,no-reset-on-init: When present, the module should not be reset at init
 - ti,no-idle-on-init: When present, the module should not be idled at init
+- ti,no-idle: When present, the module is never allowed to idle.
 
 Example:
 
index 9a53c929f017d16527270bc2244352edf1d34cd8..000336733a6a99f5b084fc60634738a3376d259a 100644 (file)
@@ -3491,6 +3491,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
        ro              [KNL] Mount root device read-only on boot
 
+       rodata=         [KNL]
+               on      Mark read-only kernel memory as read-only (default).
+               off     Leave read-only kernel memory writable for debugging.
+
        root=           [KNL] Root filesystem
                        See name_to_dev_t comment in init/do_mounts.c.
 
index a93b414672a71ac6fa9bac1e848215804bde139c..f886fbb1ad05d78ecda7c8a3289528ce9613665a 100644 (file)
@@ -58,6 +58,8 @@ show up in /proc/sys/kernel:
 - panic_on_stackoverflow
 - panic_on_unrecovered_nmi
 - panic_on_warn
+- perf_cpu_time_max_percent
+- perf_event_paranoid
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -639,6 +641,17 @@ allowed to execute.
 
 ==============================================================
 
+perf_event_paranoid:
+
+Controls use of the performance events system by unprivileged
+users (without CAP_SYS_ADMIN).  The default value is 1.
+
+ -1: Allow use of (almost) all events by all users
+>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
+>=1: Disallow CPU event access by users without CAP_SYS_ADMIN
+>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN
+
+==============================================================
 
 pid_max:
 
index daf9c0f742d22e882637391539e90ea31eb788f3..c81731096a4338bcec4d43b8049a26118d10f260 100644 (file)
@@ -358,7 +358,8 @@ In the first case there are two additional complications:
 - if CR4.SMEP is enabled: since we've turned the page into a kernel page,
   the kernel may now execute it.  We handle this by also setting spte.nx.
   If we get a user fetch or read fault, we'll change spte.u=1 and
-  spte.nx=gpte.nx back.
+  spte.nx=gpte.nx back.  For this to work, KVM forces EFER.NX to 1 when
+  shadow paging is in use.
 - if CR4.SMAP is disabled: since the page has been changed to a kernel
   page, it can not be reused when CR4.SMAP is enabled. We set
   CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
index 4029c63d8a7d4dd68bebf7f923755a9904411ae3..2061ea77667c36260a72f92ba12c3ecdb625d3c6 100644 (file)
@@ -7399,6 +7399,17 @@ W:       https://www.myricom.com/support/downloads/myri10ge.html
 S:     Supported
 F:     drivers/net/ethernet/myricom/myri10ge/
 
+NAND FLASH SUBSYSTEM
+M:     Boris Brezillon <boris.brezillon@free-electrons.com>
+R:     Richard Weinberger <richard@nod.at>
+L:     linux-mtd@lists.infradead.org
+W:     http://www.linux-mtd.infradead.org/
+Q:     http://patchwork.ozlabs.org/project/linux-mtd/list/
+T:     git git://github.com/linux-nand/linux.git
+S:     Maintained
+F:     drivers/mtd/nand/
+F:     include/linux/mtd/nand*.h
+
 NATSEMI ETHERNET DRIVER (DP8381x)
 S:     Orphan
 F:     drivers/net/ethernet/natsemi/natsemi.c
@@ -8464,6 +8475,7 @@ PERFORMANCE EVENTS SUBSYSTEM
 M:     Peter Zijlstra <peterz@infradead.org>
 M:     Ingo Molnar <mingo@redhat.com>
 M:     Arnaldo Carvalho de Melo <acme@kernel.org>
+R:     Alexander Shishkin <alexander.shishkin@linux.intel.com>
 L:     linux-kernel@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
 S:     Supported
index 2d519d2fb3a9be1a64100c85141b824a37924602..7b3ecdcdc6c1815ca6241a72b1967593a0335b41 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
index 23fc670c0427710168ce41ef012f9e37b8218eb6..5c21b236721fcb9d6e2b5055cbc89ae9527df7f1 100644 (file)
@@ -70,8 +70,8 @@
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                pcie-controller {
                        status = "okay";
index f774101416a5522841c475252d0a86842e475b29..ebe1d267406df5ab30e3a3189b669733eb8fcaa4 100644 (file)
@@ -76,8 +76,8 @@
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
                          MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                devbus-bootcs {
                        status = "okay";
index 4878d7353069fc2720e15d76af307618daced15c..5730b875c4f51a1aa2743d8881b18d6dbc0b27cd 100644 (file)
@@ -95,8 +95,8 @@
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
                          MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                devbus-bootcs {
                        status = "okay";
index fb9e1bbf23385b85b0b82ddb153b922b2d2e0178..8af463f26ea1e162360952dbc03c0a9bda807cd5 100644 (file)
@@ -65,8 +65,8 @@
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
                        MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-                       MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                       MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                       MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                       MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                pcie-controller {
                        status = "okay";
index 6e9820e141f8de5a850edcd7e74f0ea2a1acd1ed..b89e6cf1271a1370aead2a000e729cefad376ab0 100644 (file)
@@ -70,8 +70,8 @@
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                pcie-controller {
                        status = "okay";
index 6ab33837a2b6d0a5aaf4e48763bb838451a346bd..6522b04f4a8e7c7a759100153d7d990f1fb160d3 100644 (file)
@@ -68,8 +68,8 @@
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                internal-regs {
                        serial@12000 {
index 62175a8848bc2234343a8627d7cdf0ababfee851..d19f44c709257d9a35644d71cb49a04e354c42d8 100644 (file)
@@ -64,8 +64,8 @@
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                pcie-controller {
                        status = "okay";
index a5db17782e085662d01a22683c5c364be5c25c8c..853bd392a4fe20155ed1469a23175213814ca9dc 100644 (file)
@@ -65,9 +65,9 @@
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-                         MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                devbus-bootcs {
                        status = "okay";
index 2391b11dc546b859dd3ab23a700be5a8a63ea83b..d17dab0a6f513e9c04ff2675f5f9d44d7f6953b3 100644 (file)
@@ -78,8 +78,8 @@
        soc {
                ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
-                         MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000
-                         MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>;
+                         MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
+                         MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
 
                pcie-controller {
                        status = "okay";
index c4d9175b90dceab5aa64ca1973505652bf325bee..f82aa44c3ceed540fef7e291c571c2b49c5b2e1f 100644 (file)
                               0x48485200 0x2E00>;
                        #address-cells = <1>;
                        #size-cells = <1>;
+
+                       /*
+                        * Do not allow gating of cpsw clock as workaround
+                        * for errata i877. Keeping internal clock disabled
+                        * causes the device switching characteristics
+                        * to degrade over time and eventually fail to meet
+                        * the data manual delay time/skew specs.
+                        */
+                       ti,no-idle;
+
                        /*
                         * rx_thresh_pend
                         * rx_pend
index d5525bfc7e3e61879d278ae08b446e185c206982..9156fc303afd8d278671c6d4d277d866fdd2ad7b 100644 (file)
@@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
 #ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
 #else
index 7d0cba6f1cc5efadff31fd0cde2aca3279e120da..139791ed473d5264682c004ea2ea7af8ddd28f5d 100644 (file)
@@ -176,13 +176,13 @@ static struct resource mem_res[] = {
                .name = "Kernel code",
                .start = 0,
                .end = 0,
-               .flags = IORESOURCE_MEM
+               .flags = IORESOURCE_SYSTEM_RAM
        },
        {
                .name = "Kernel data",
                .start = 0,
                .end = 0,
-               .flags = IORESOURCE_MEM
+               .flags = IORESOURCE_SYSTEM_RAM
        }
 };
 
@@ -851,7 +851,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
                res->name  = "System RAM";
                res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
                res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
                request_resource(&iomem_resource, res);
 
index 77d6b1bab278292e31a6435294e824a6a76648b9..ee06fabdf60e69e014317c9703787da5f31dc79f 100644 (file)
@@ -86,8 +86,8 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb)
 {
        dma_addr_t dma;
 
-       fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
-               PANEL_SIZE, &dma, GFP_KERNEL);
+       fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, PANEL_SIZE, &dma,
+                                         GFP_KERNEL);
        if (!fb->fb.screen_base) {
                printk(KERN_ERR "CLCD: unable to map framebuffer\n");
                return -ENOMEM;
@@ -116,15 +116,14 @@ static int lpc32xx_clcd_setup(struct clcd_fb *fb)
 
 static int lpc32xx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-       return dma_mmap_writecombine(&fb->dev->dev, vma,
-               fb->fb.screen_base, fb->fb.fix.smem_start,
-               fb->fb.fix.smem_len);
+       return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+                          fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 static void lpc32xx_clcd_remove(struct clcd_fb *fb)
 {
-       dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-               fb->fb.screen_base, fb->fb.fix.smem_start);
+       dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+                   fb->fb.fix.smem_start);
 }
 
 /*
index d122ee6ab9913aa8e8f2896ea0fee44a01832068..8814ee5e98fd52ff75412fd863542954037dafb8 100644 (file)
@@ -42,8 +42,8 @@ int netx_clcd_setup(struct clcd_fb *fb)
 
        fb->panel = netx_panel;
 
-       fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, 1024*1024,
-                                                   &dma, GFP_KERNEL);
+       fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma,
+                                         GFP_KERNEL);
        if (!fb->fb.screen_base) {
                printk(KERN_ERR "CLCD: unable to map framebuffer\n");
                return -ENOMEM;
@@ -57,16 +57,14 @@ int netx_clcd_setup(struct clcd_fb *fb)
 
 int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-       return dma_mmap_writecombine(&fb->dev->dev, vma,
-                                    fb->fb.screen_base,
-                                    fb->fb.fix.smem_start,
-                                    fb->fb.fix.smem_len);
+       return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+                          fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void netx_clcd_remove(struct clcd_fb *fb)
 {
-       dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-                             fb->fb.screen_base, fb->fb.fix.smem_start);
+       dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+                   fb->fb.fix.smem_start);
 }
 
 static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL);
index abea12617b173e2b34f794c4bdf98b8b5d518c48..ea0e5b2ca1cd49e6328faccbb467c1d7b669430f 100644 (file)
@@ -90,8 +90,8 @@ int nspire_clcd_setup(struct clcd_fb *fb)
        panel_size = ((panel->mode.xres * panel->mode.yres) * panel->bpp) / 8;
        panel_size = ALIGN(panel_size, PAGE_SIZE);
 
-       fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev,
-               panel_size, &dma, GFP_KERNEL);
+       fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, panel_size, &dma,
+                                         GFP_KERNEL);
 
        if (!fb->fb.screen_base) {
                pr_err("CLCD: unable to map framebuffer\n");
@@ -107,13 +107,12 @@ int nspire_clcd_setup(struct clcd_fb *fb)
 
 int nspire_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-       return dma_mmap_writecombine(&fb->dev->dev, vma,
-               fb->fb.screen_base, fb->fb.fix.smem_start,
-               fb->fb.fix.smem_len);
+       return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+                          fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void nspire_clcd_remove(struct clcd_fb *fb)
 {
-       dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-               fb->fb.screen_base, fb->fb.fix.smem_start);
+       dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+                   fb->fb.fix.smem_start);
 }
index e9f65fec55c0b9beacfdf694424e601b4673327e..b6d62e4cdfddaf53f8d3aa7c1768b5e83255673e 100644 (file)
@@ -2200,6 +2200,11 @@ static int _enable(struct omap_hwmod *oh)
  */
 static int _idle(struct omap_hwmod *oh)
 {
+       if (oh->flags & HWMOD_NO_IDLE) {
+               oh->_int_flags |= _HWMOD_SKIP_ENABLE;
+               return 0;
+       }
+
        pr_debug("omap_hwmod: %s: idling\n", oh->name);
 
        if (oh->_state != _HWMOD_STATE_ENABLED) {
@@ -2504,6 +2509,8 @@ static int __init _init(struct omap_hwmod *oh, void *data)
                        oh->flags |= HWMOD_INIT_NO_RESET;
                if (of_find_property(np, "ti,no-idle-on-init", NULL))
                        oh->flags |= HWMOD_INIT_NO_IDLE;
+               if (of_find_property(np, "ti,no-idle", NULL))
+                       oh->flags |= HWMOD_NO_IDLE;
        }
 
        oh->_state = _HWMOD_STATE_INITIALIZED;
@@ -2630,7 +2637,7 @@ static void __init _setup_postsetup(struct omap_hwmod *oh)
         * XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data -
         * it should be set by the core code as a runtime flag during startup
         */
-       if ((oh->flags & HWMOD_INIT_NO_IDLE) &&
+       if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) &&
            (postsetup_state == _HWMOD_STATE_IDLE)) {
                oh->_int_flags |= _HWMOD_SKIP_ENABLE;
                postsetup_state = _HWMOD_STATE_ENABLED;
index 76bce11c85a40c477a5b87aab919d5b8dfded5e8..7c7a31169475b72bc8fbbe2f0dabf015b395f257 100644 (file)
@@ -525,6 +525,8 @@ struct omap_hwmod_omap4_prcm {
  *     or idled.
  * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to
  *     operate and they need to be handled at the same time as the main_clk.
+ * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain
+ *     IPs like CPSW on DRA7, where clocks to this module cannot be disabled.
  */
 #define HWMOD_SWSUP_SIDLE                      (1 << 0)
 #define HWMOD_SWSUP_MSTANDBY                   (1 << 1)
@@ -541,6 +543,7 @@ struct omap_hwmod_omap4_prcm {
 #define HWMOD_SWSUP_SIDLE_ACT                  (1 << 12)
 #define HWMOD_RECONFIG_IO_CHAIN                        (1 << 13)
 #define HWMOD_OPT_CLKS_NEEDED                  (1 << 14)
+#define HWMOD_NO_IDLE                          (1 << 15)
 
 /*
  * omap_hwmod._int_flags definitions
index 04aff2c31b4607a99265d038d3ba0fd3b9a908b3..70f2f699bed3efe0802def3dc879cdd8e8857a05 100644 (file)
@@ -53,8 +53,8 @@ static void s3c_pm_run_res(struct resource *ptr, run_fn_t fn, u32 *arg)
                if (ptr->child != NULL)
                        s3c_pm_run_res(ptr->child, fn, arg);
 
-               if ((ptr->flags & IORESOURCE_MEM) &&
-                   strcmp(ptr->name, "System RAM") == 0) {
+               if ((ptr->flags & IORESOURCE_SYSTEM_RAM)
+                               == IORESOURCE_SYSTEM_RAM) {
                        S3C_PMDBG("Found system RAM at %08lx..%08lx\n",
                                  (unsigned long)ptr->start,
                                  (unsigned long)ptr->end);
index b2b97e3e7babbbb37072b4185faa27a0e47760c8..a62a7b64f49c52706b8e133b1f786dc9dc34842d 100644 (file)
@@ -23,9 +23,8 @@
 #include <linux/const.h>
 #include <asm/page.h>
 
-       __PAGE_ALIGNED_DATA
-
        .globl vdso_start, vdso_end
+       .section .data..ro_after_init
        .balign PAGE_SIZE
 vdso_start:
        .incbin "arch/arm/vdso/vdso.so"
index 7fc294c3bc5baab31b13b1e23753d17835ce3b27..22dda613f9c91bd3bcfe3a8aad435f7384795401 100644 (file)
@@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
 #endif
index f5060867458024a5a61ccfd4a77330c728e13010..819aff5d593f701d1101d4cc42dbec8cedbc80aa 100644 (file)
@@ -40,7 +40,7 @@
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
  *     fixed mappings and modules
  */
-#define VMEMMAP_SIZE           ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE)
+#define VMEMMAP_SIZE           ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
 
 #ifndef CONFIG_KASAN
 #define VMALLOC_START          (VA_START)
@@ -52,7 +52,8 @@
 #define VMALLOC_END            (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define VMEMMAP_START          (VMALLOC_END + SZ_64K)
-#define vmemmap                        ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
+#define vmemmap                        ((struct page *)VMEMMAP_START - \
+                                SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
 
 #define FIRST_USER_ADDRESS     0UL
 
index 8119479147db147c33800f76aa0d07c6072e8559..450987d99b9b9e497411a79f34781a3cb242df6e 100644 (file)
@@ -73,13 +73,13 @@ static struct resource mem_res[] = {
                .name = "Kernel code",
                .start = 0,
                .end = 0,
-               .flags = IORESOURCE_MEM
+               .flags = IORESOURCE_SYSTEM_RAM
        },
        {
                .name = "Kernel data",
                .start = 0,
                .end = 0,
-               .flags = IORESOURCE_MEM
+               .flags = IORESOURCE_SYSTEM_RAM
        }
 };
 
@@ -210,7 +210,7 @@ static void __init request_standard_resources(void)
                res->name  = "System RAM";
                res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
                res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
                request_resource(&iomem_resource, res);
 
index e33fe33876ab3804f2c6dcd6c5458e576596ef24..fd10eb6638689c1262c34cca7de533453cb383ae 100644 (file)
@@ -145,6 +145,10 @@ ENTRY(cpu_resume_mmu)
 ENDPROC(cpu_resume_mmu)
        .popsection
 cpu_resume_after_mmu:
+#ifdef CONFIG_KASAN
+       mov     x0, sp
+       bl      kasan_unpoison_remaining_stack
+#endif
        mov     x0, #0                  // return zero on success
        ldp     x19, x20, [sp, #16]
        ldp     x21, x22, [sp, #32]
index 82d607c3614ed8454d19f2da5d44a6cc914ee0b3..da30529bb1f65c9e3d5408b2e28ab31bc2283211 100644 (file)
@@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt)
                hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
        } else if (ps == PUD_SIZE) {
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-       } else if (ps == (PAGE_SIZE * CONT_PTES)) {
-               hugetlb_add_hstate(CONT_PTE_SHIFT);
-       } else if (ps == (PMD_SIZE * CONT_PMDS)) {
-               hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
        } else {
                pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
                return 0;
@@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt)
        return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
-       if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-               hugetlb_add_hstate(CONT_PMD_SHIFT);
-       return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif
index 209ae5ad34958044ae3d6addcafa090f890a8be7..e6928896da2a4a2879828df92d940a2ed321a4ea 100644 (file)
@@ -49,13 +49,13 @@ static struct resource __initdata kernel_data = {
        .name   = "Kernel data",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_MEM,
+       .flags  = IORESOURCE_SYSTEM_RAM,
 };
 static struct resource __initdata kernel_code = {
        .name   = "Kernel code",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_MEM,
+       .flags  = IORESOURCE_SYSTEM_RAM,
        .sibling = &kernel_data,
 };
 
@@ -134,7 +134,7 @@ add_physical_memory(resource_size_t start, resource_size_t end)
        new->start = start;
        new->end = end;
        new->name = "System RAM";
-       new->flags = IORESOURCE_MEM;
+       new->flags = IORESOURCE_SYSTEM_RAM;
 
        *pprev = new;
 }
index 72e17f7ebd6ff0fba193ac07190d42b41e106e76..786e36e2f61de91c8f446cc941cff895cdb91578 100644 (file)
@@ -281,8 +281,6 @@ notrace void __init machine_init(unsigned long dt_ptr)
         */
        set_ist(_vectors_start);
 
-       lockdep_init();
-
        /*
         * dtb is passed in from bootloader.
         * fdt is linked in blob.
index caae3f4e4341a40d86f4014871c1ebe929a937ae..300dac3702f11a13460d1954843bfcbd1ddc2034 100644 (file)
@@ -1178,7 +1178,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
        efi_memory_desc_t *md;
        u64 efi_desc_size;
        char *name;
-       unsigned long flags;
+       unsigned long flags, desc;
 
        efi_map_start = __va(ia64_boot_param->efi_memmap);
        efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
@@ -1193,6 +1193,8 @@ efi_initialize_iomem_resources(struct resource *code_resource,
                        continue;
 
                flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               desc = IORES_DESC_NONE;
+
                switch (md->type) {
 
                        case EFI_MEMORY_MAPPED_IO:
@@ -1207,14 +1209,17 @@ efi_initialize_iomem_resources(struct resource *code_resource,
                                if (md->attribute & EFI_MEMORY_WP) {
                                        name = "System ROM";
                                        flags |= IORESOURCE_READONLY;
-                               } else if (md->attribute == EFI_MEMORY_UC)
+                               } else if (md->attribute == EFI_MEMORY_UC) {
                                        name = "Uncached RAM";
-                               else
+                               } else {
                                        name = "System RAM";
+                                       flags |= IORESOURCE_SYSRAM;
+                               }
                                break;
 
                        case EFI_ACPI_MEMORY_NVS:
                                name = "ACPI Non-volatile Storage";
+                               desc = IORES_DESC_ACPI_NV_STORAGE;
                                break;
 
                        case EFI_UNUSABLE_MEMORY:
@@ -1224,6 +1229,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 
                        case EFI_PERSISTENT_MEMORY:
                                name = "Persistent Memory";
+                               desc = IORES_DESC_PERSISTENT_MEMORY;
                                break;
 
                        case EFI_RESERVED_TYPE:
@@ -1246,6 +1252,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
                res->start = md->phys_addr;
                res->end = md->phys_addr + efi_md_size(md) - 1;
                res->flags = flags;
+               res->desc = desc;
 
                if (insert_resource(&iomem_resource, res) < 0)
                        kfree(res);
index 4f118b0d30915f4f5927719522414a66efe17f08..2029a38a72aeee89793d53ee60418567586d67ff 100644 (file)
@@ -80,17 +80,17 @@ unsigned long vga_console_membase;
 
 static struct resource data_resource = {
        .name   = "Kernel data",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
        .name   = "Kernel code",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
        .name   = "Kernel bss",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 unsigned long ia64_max_cacheline_size;
index a5ecef7188baa39e6f21d7a5a7c5ed6cca8f040f..136c69f1fb8ab8b73c23e2a752ea371c6e484512 100644 (file)
@@ -70,14 +70,14 @@ static struct resource data_resource = {
        .name   = "Kernel data",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
        .name   = "Kernel code",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 unsigned long memory_start;
index 89a2a93949274b16b3508b314d0cbdf6db5946e6..f31ebb5dc26c21922f240545d038f0d6b5699300 100644 (file)
@@ -130,8 +130,6 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
        memset(__bss_start, 0, __bss_stop-__bss_start);
        memset(_ssbss, 0, _esbss-_ssbss);
 
-       lockdep_init();
-
 /* initialize device tree for usage in early_printk */
        early_init_devtree(_fdt_start);
 
index 74a3db92da1b52edc15165ac06d3b1558b8b78c0..d3da79dda629114e936af3e811348171c62a9bcd 100644 (file)
@@ -2169,7 +2169,7 @@ config MIPS_MT_SMP
        select CPU_MIPSR2_IRQ_VI
        select CPU_MIPSR2_IRQ_EI
        select SYNC_R4K
-       select MIPS_GIC_IPI
+       select MIPS_GIC_IPI if MIPS_GIC
        select MIPS_MT
        select SMP
        select SMP_UP
@@ -2267,7 +2267,7 @@ config MIPS_VPE_APSP_API_MT
 config MIPS_CMP
        bool "MIPS CMP framework support (DEPRECATED)"
        depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6
-       select MIPS_GIC_IPI
+       select MIPS_GIC_IPI if MIPS_GIC
        select SMP
        select SYNC_R4K
        select SYS_SUPPORTS_SMP
@@ -2287,7 +2287,7 @@ config MIPS_CPS
        select MIPS_CM
        select MIPS_CPC
        select MIPS_CPS_PM if HOTPLUG_CPU
-       select MIPS_GIC_IPI
+       select MIPS_GIC_IPI if MIPS_GIC
        select SMP
        select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
        select SYS_SUPPORTS_HOTPLUG_CPU
@@ -2306,6 +2306,7 @@ config MIPS_CPS_PM
        bool
 
 config MIPS_GIC_IPI
+       depends on MIPS_GIC
        bool
 
 config MIPS_CM
index 408799a839b42f2d9d7d1232bd7ea4efda0cf2d4..f7521142deda55fb439fa0857a2383b4dad7cc83 100644 (file)
@@ -17,7 +17,7 @@
 #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset))
 #endif
 
-#ifdef CONFIG_MACH_JZ4740
+#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780)
 #include <asm/mach-jz4740/base.h>
 #define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
 #endif
index 5fdaf8bdcd2ebeed8e31eb7825edd958bfc1c251..4f607341a7938867efc6f0f0646c218bf168421b 100644 (file)
@@ -732,21 +732,23 @@ static void __init resource_init(void)
                        end = HIGHMEM_START - 1;
 
                res = alloc_bootmem(sizeof(struct resource));
+
+               res->start = start;
+               res->end = end;
+               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
                switch (boot_mem_map.map[i].type) {
                case BOOT_MEM_RAM:
                case BOOT_MEM_INIT_RAM:
                case BOOT_MEM_ROM_DATA:
                        res->name = "System RAM";
+                       res->flags |= IORESOURCE_SYSRAM;
                        break;
                case BOOT_MEM_RESERVED:
                default:
                        res->name = "reserved";
                }
 
-               res->start = start;
-               res->end = end;
-
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
                request_resource(&iomem_resource, res);
 
                /*
index bd4385a8e6e86f7fbb9ca6d988f5eee155b9a8c7..2b521e07b8601a2c80e888b3064009779effa78b 100644 (file)
@@ -121,6 +121,7 @@ static inline void calculate_cpu_foreign_map(void)
        cpumask_t temp_foreign_map;
 
        /* Re-calculate the mask */
+       cpumask_clear(&temp_foreign_map);
        for_each_online_cpu(i) {
                core_present = 0;
                for_each_cpu(k, &temp_foreign_map)
index 3d0e17bcc8e905ece06053ae15b3ff5443e6b033..df0f52bd18b457f9efa1a0b9fc470b0803f50632 100644 (file)
@@ -22,6 +22,9 @@
 
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init        __read_mostly
+
 void parisc_cache_init(void);  /* initializes cache-flushing */
 void disable_sr_hashing_asm(int); /* low level support for above */
 void disable_sr_hashing(void);   /* turns off space register hashing */
index 845272ce9cc587222e835131bb5ed96be0be03d4..7bd69bd43a018577d099f373346383900b1f0121 100644 (file)
@@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma
        }
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
-#endif
-
 #include <asm/kmap_types.h>
 
 #define ARCH_HAS_KMAP
index 1b366c47768722081efe57a9db39eaba2a7dcde9..3c07d6b968772bc6de99bada8d83578ffe172c3d 100644 (file)
@@ -55,12 +55,12 @@ signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
 
 static struct resource data_resource = {
        .name   = "Kernel data",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource code_resource = {
        .name   = "Kernel code",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource pdcdata_resource = {
@@ -201,7 +201,7 @@ static void __init setup_bootmem(void)
                res->name = "System RAM";
                res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT;
                res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
                request_resource(&iomem_resource, res);
        }
 
index ad8c9db61237223c3b807e3a9afed1487c44af1e..d544fa31175766bf48790f75bcd5458d6a505080 100644 (file)
@@ -114,8 +114,6 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
 
 notrace void __init machine_init(u64 dt_ptr)
 {
-       lockdep_init();
-
        /* Enable early debugging if any specified (see udbg.h) */
        udbg_early_init();
 
index 5c03a6a9b0542fac3d042f2481f367aae9178d38..f98be8383a39994868ba41d0f8492e99f48eb1fe 100644 (file)
@@ -255,9 +255,6 @@ void __init early_setup(unsigned long dt_ptr)
        setup_paca(&boot_paca);
        fixup_boot_paca();
 
-       /* Initialize lockdep early or else spinlocks will blow */
-       lockdep_init();
-
        /* -------- printk is now safe to use ------- */
 
        /* Enable early debugging if any specified (see udbg.h) */
index 6ee26de9a1ded02e0e08f376fd612049eedee0b4..25ae2c9913c39c2fae34fb60dd7ee655ba821b6e 100644 (file)
@@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        std     r6, VCPU_ACOP(r9)
        stw     r7, VCPU_GUEST_PID(r9)
        std     r8, VCPU_WORT(r9)
+       /*
+        * Restore various registers to 0, where non-zero values
+        * set by the guest could disrupt the host.
+        */
+       li      r0, 0
+       mtspr   SPRN_IAMR, r0
+       mtspr   SPRN_CIABR, r0
+       mtspr   SPRN_DAWRX, r0
+       mtspr   SPRN_TCSCR, r0
+       mtspr   SPRN_WORT, r0
+       /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+       li      r0, 1
+       sldi    r0, r0, 31
+       mtspr   SPRN_MMCRS, r0
 8:
 
        /* Save and reset AMR and UAMOR before turning on the MMU */
index d0f0a514b04ed66da35d1ae6b27b964285ef2aec..f078a1f94fc267de0e4de530cd413608c2d93e5f 100644 (file)
@@ -541,7 +541,7 @@ static int __init add_system_ram_resources(void)
                        res->name = "System RAM";
                        res->start = base;
                        res->end = base + size - 1;
-                       res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+                       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
                        WARN_ON(request_resource(&iomem_resource, res) < 0);
                }
        }
index fb1b93ea3e3fead0efde9f30e819c55eecb88da9..e485817f7b1a97683e64ab04d1f7444e1aae2e1d 100644 (file)
 static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
 {
+       spin_lock_init(&mm->context.list_lock);
+       INIT_LIST_HEAD(&mm->context.pgtable_list);
+       INIT_LIST_HEAD(&mm->context.gmap_list);
        cpumask_clear(&mm->context.cpu_attach_mask);
        atomic_set(&mm->context.attach_count, 0);
        mm->context.flush_mm = 0;
-       mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
-       mm->context.asce_bits |= _ASCE_TYPE_REGION3;
 #ifdef CONFIG_PGSTE
        mm->context.alloc_pgste = page_table_allocate_pgste;
        mm->context.has_pgste = 0;
        mm->context.use_skey = 0;
 #endif
-       mm->context.asce_limit = STACK_TOP_MAX;
+       if (mm->context.asce_limit == 0) {
+               /* context created by exec, set asce limit to 4TB */
+               mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+                       _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+               mm->context.asce_limit = STACK_TOP_MAX;
+       } else if (mm->context.asce_limit == (1UL << 31)) {
+               mm_inc_nr_pmds(mm);
+       }
        crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
        return 0;
 }
@@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev,
 static inline void arch_dup_mmap(struct mm_struct *oldmm,
                                 struct mm_struct *mm)
 {
-       if (oldmm->context.asce_limit < mm->context.asce_limit)
-               crst_table_downgrade(mm, oldmm->context.asce_limit);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
index 7b7858f158b4574b549ab99648f977a3e249068c..d7cc79fb6191117a0ee0646ae6bbdf3a92a58132 100644 (file)
@@ -100,12 +100,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-       spin_lock_init(&mm->context.list_lock);
-       INIT_LIST_HEAD(&mm->context.pgtable_list);
-       INIT_LIST_HEAD(&mm->context.gmap_list);
-       return (pgd_t *) crst_table_alloc(mm);
+       unsigned long *table = crst_table_alloc(mm);
+
+       if (!table)
+               return NULL;
+       if (mm->context.asce_limit == (1UL << 31)) {
+               /* Forking a compat process with 2 page table levels */
+               if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+                       crst_table_free(mm, table);
+                       return NULL;
+               }
+       }
+       return (pgd_t *) table;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+       if (mm->context.asce_limit == (1UL << 31))
+               pgtable_pmd_page_dtor(virt_to_page(pgd));
+       crst_table_free(mm, (unsigned long *) pgd);
 }
-#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
 
 static inline void pmd_populate(struct mm_struct *mm,
                                pmd_t *pmd, pgtable_t pte)
index c55576bbaa1f7555479994d6848c9aa5b60f189b..a0684de5a93b99ae199f9bf6906327f4513f662e 100644 (file)
@@ -448,7 +448,6 @@ void __init startup_init(void)
        rescue_initrd();
        clear_bss_section();
        init_kernel_storage_key();
-       lockdep_init();
        lockdep_off();
        setup_lowcore_early();
        setup_facility_list();
index c5febe84eba633385cd7a8c09bee87e484c2e05c..03c2b469c4725172e4c6750cb8bcb0fb4edea075 100644 (file)
@@ -16,7 +16,7 @@
 
 __HEAD
 ENTRY(startup_continue)
-       tm      __LC_STFLE_FAC_LIST+6,0x80      # LPP available ?
+       tm      __LC_STFLE_FAC_LIST+5,0x80      # LPP available ?
        jz      0f
        xc      __LC_LPP+1(7,0),__LC_LPP+1      # clear lpp and current_pid
        mvi     __LC_LPP,0x80                   #   and set LPP_MAGIC
index 9220db5c996aa5250ca4d904361c51cec33d7827..cedb0198675f7577c7237bb61c006acaf0e984d4 100644 (file)
@@ -374,17 +374,17 @@ static void __init setup_lowcore(void)
 
 static struct resource code_resource = {
        .name  = "Kernel code",
-       .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource data_resource = {
        .name = "Kernel data",
-       .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource bss_resource = {
        .name = "Kernel bss",
-       .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource __initdata *standard_resources[] = {
@@ -408,7 +408,7 @@ static void __init setup_resources(void)
 
        for_each_memblock(memory, reg) {
                res = alloc_bootmem_low(sizeof(*res));
-               res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+               res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
                res->name = "System RAM";
                res->start = reg->base;
index 4af21c771f9b3925bf860d0dc86cebcc803cf965..03dfe9c667f4eb944705787e54ff7e6ac3c08afb 100644 (file)
@@ -2381,7 +2381,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
 
        /* manually convert vector registers if necessary */
        if (MACHINE_HAS_VX) {
-               convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+               convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
                rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
                                     fprs, 128);
        } else {
index b48459afefddb835582e548f5b8d7b4ffd026e0d..f3a0649ab5217ac5f87b83580723dfda40fff3de 100644 (file)
@@ -101,7 +101,7 @@ static void __init resource_init(void)
        res->name = "System RAM";
        res->start = MEMORY_START;
        res->end = MEMORY_START + MEMORY_SIZE - 1;
-       res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
        request_resource(&iomem_resource, res);
 
        request_resource(res, &code_resource);
index de19cfa768f208708406e3350b5e9aecc92a6266..3f1c18b28e8af5fc414c13ebeb29ccbf04cd63a5 100644 (file)
@@ -78,17 +78,17 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0, };
 
 static struct resource code_resource = {
        .name = "Kernel code",
-       .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource data_resource = {
        .name = "Kernel data",
-       .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 static struct resource bss_resource = {
        .name   = "Kernel bss",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM,
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
 };
 
 unsigned long memory_start;
@@ -202,7 +202,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
        res->name = "System RAM";
        res->start = start;
        res->end = end - 1;
-       res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
        if (request_resource(&iomem_resource, res)) {
                pr_err("unable to request memory_resource 0x%lx 0x%lx\n",
index f2d30cab5b3f388fa9b446cf5b12afda92fa0a9d..cd1f592cd3479f8c94c599bfc589087184d4d180 100644 (file)
@@ -696,14 +696,6 @@ tlb_fixup_done:
        call    __bzero
         sub    %o1, %o0, %o1
 
-#ifdef CONFIG_LOCKDEP
-       /* We have this call this super early, as even prom_init can grab
-        * spinlocks and thus call into the lockdep code.
-        */
-       call    lockdep_init
-        nop
-#endif
-
        call    prom_init
         mov    %l7, %o0                        ! OpenPROM cif handler
 
index 6f216853f2724f8395bdd36ab18c4eadbeae0366..1cfe6aab7a11572d54f6848fe4656b7b40af8cf2 100644 (file)
@@ -2863,17 +2863,17 @@ void hugetlb_setup(struct pt_regs *regs)
 
 static struct resource code_resource = {
        .name   = "Kernel code",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource data_resource = {
        .name   = "Kernel data",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
        .name   = "Kernel bss",
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static inline resource_size_t compute_kern_paddr(void *addr)
@@ -2909,7 +2909,7 @@ static int __init report_memory(void)
                res->name = "System RAM";
                res->start = pavail[i].phys_addr;
                res->end = pavail[i].phys_addr + pavail[i].reg_size - 1;
-               res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+               res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
                if (insert_resource(&iomem_resource, res) < 0) {
                        pr_warn("Resource insertion failed.\n");
index bbb855de6569841200c991bf610b090c4e94c47b..a992238e9b58260fdfa067c4cc2c0d348ceda41f 100644 (file)
@@ -1632,14 +1632,14 @@ static struct resource data_resource = {
        .name   = "Kernel data",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
        .name   = "Kernel code",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 /*
@@ -1673,10 +1673,15 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved)
                kzalloc(sizeof(struct resource), GFP_ATOMIC);
        if (!res)
                return NULL;
-       res->name = reserved ? "Reserved" : "System RAM";
        res->start = start_pfn << PAGE_SHIFT;
        res->end = (end_pfn << PAGE_SHIFT) - 1;
        res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+       if (reserved) {
+               res->name = "Reserved";
+       } else {
+               res->name = "System RAM";
+               res->flags |= IORESOURCE_SYSRAM;
+       }
        if (insert_resource(&iomem_resource, res)) {
                kfree(res);
                return NULL;
index 3fa317f96122130d30cfc7be41d4e1450fc56a02..c2bffa5614a48102a1d66a60865a2e3474a49597 100644 (file)
@@ -72,13 +72,13 @@ static struct resource mem_res[] = {
                .name = "Kernel code",
                .start = 0,
                .end = 0,
-               .flags = IORESOURCE_MEM
+               .flags = IORESOURCE_SYSTEM_RAM
        },
        {
                .name = "Kernel data",
                .start = 0,
                .end = 0,
-               .flags = IORESOURCE_MEM
+               .flags = IORESOURCE_SYSTEM_RAM
        }
 };
 
@@ -211,7 +211,7 @@ request_standard_resources(struct meminfo *mi)
                res->name  = "System RAM";
                res->start = mi->bank[i].start;
                res->end   = mi->bank[i].start + mi->bank[i].size - 1;
-               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
                request_resource(&iomem_resource, res);
 
index 1538562cc720e78132d0eb31c38116b029d56ce0..eb3abf8ac44eb33f333ed29727dfd49ba6bfbf38 100644 (file)
@@ -1,6 +1,7 @@
-
 obj-y += entry/
 
+obj-$(CONFIG_PERF_EVENTS) += events/
+
 obj-$(CONFIG_KVM) += kvm/
 
 # Xen paravirtualization support
index c46662f64c392050d1c45b722e3de7c01f87e250..b1051057e5b014b6a2b11b5c2cdbef866633e0c0 100644 (file)
@@ -303,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES
 config FIX_EARLYCON_MEM
        def_bool y
 
+config DEBUG_RODATA
+       def_bool y
+
 config PGTABLE_LEVELS
        int
        default 4 if X86_64
index 9b18ed97a8a2968b7b293bc83484d8311d830e83..7816b7b276f463b49b957dcbd40d9f38ba6c91e9 100644 (file)
@@ -74,28 +74,16 @@ config EFI_PGT_DUMP
          issues with the mapping of the EFI runtime regions into that
          table.
 
-config DEBUG_RODATA
-       bool "Write protect kernel read-only data structures"
-       default y
-       depends on DEBUG_KERNEL
-       ---help---
-         Mark the kernel read-only data as write-protected in the pagetables,
-         in order to catch accidental (and incorrect) writes to such const
-         data. This is recommended so that we can catch kernel bugs sooner.
-         If in doubt, say "Y".
-
 config DEBUG_RODATA_TEST
-       bool "Testcase for the DEBUG_RODATA feature"
-       depends on DEBUG_RODATA
+       bool "Testcase for the marking rodata read-only"
        default y
        ---help---
-         This option enables a testcase for the DEBUG_RODATA
-         feature as well as for the change_page_attr() infrastructure.
+         This option enables a testcase for the setting rodata read-only
+         as well as for the change_page_attr() infrastructure.
          If in doubt, say "N"
 
 config DEBUG_WX
        bool "Warn on W+X mappings at boot"
-       depends on DEBUG_RODATA
        select X86_PTDUMP_CORE
        ---help---
          Generate a warning if any W+X mappings are found at boot.
index 0224987556ce80bd606063b56ef124b0857a3f44..3f69326ed545719aedd9e11b1be0fdea104136bc 100644 (file)
@@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
        fprintf(outfile, "#include <asm/vdso.h>\n");
        fprintf(outfile, "\n");
        fprintf(outfile,
-               "static unsigned char raw_data[%lu] __page_aligned_data = {",
+               "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
                mapping_size);
        for (j = 0; j < stripped_len; j++) {
                if (j % 10 == 0)
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
new file mode 100644 (file)
index 0000000..fdfea15
--- /dev/null
@@ -0,0 +1,13 @@
+obj-y                  += core.o
+
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/core.o amd/uncore.o
+obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
+endif
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/core.o intel/bts.o intel/cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/cstate.o intel/ds.o intel/knc.o 
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
+obj-$(CONFIG_CPU_SUP_INTEL)            += intel/rapl.o msr.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
new file mode 100644 (file)
index 0000000..049ada8
--- /dev/null
@@ -0,0 +1,731 @@
+#include <linux/perf_event.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <asm/apicdef.h>
+
+#include "../perf_event.h"
+
+static __initconst const u64 amd_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+               [ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+               [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+               [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+               [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+               [ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+               [ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+               [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
+               [ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]                   = 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]                 = 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]             = 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]                 = 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]          = 0x00c2,
+  [PERF_COUNT_HW_BRANCH_MISSES]                        = 0x00c3,
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]      = 0x00d0, /* "Decoder empty" event */
+  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]       = 0x00d1, /* "Dispatch stalls" event */
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+       return amd_perfmon_event_map[hw_event];
+}
+
+/*
+ * Previously calculated offsets
+ */
+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ *   4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ *   6 counters starting at 0xc0010200 each offset by 2
+ */
+static inline int amd_pmu_addr_offset(int index, bool eventsel)
+{
+       int offset;
+
+       if (!index)
+               return index;
+
+       if (eventsel)
+               offset = event_offsets[index];
+       else
+               offset = count_offsets[index];
+
+       if (offset)
+               return offset;
+
+       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+               offset = index;
+       else
+               offset = index << 1;
+
+       if (eventsel)
+               event_offsets[index] = offset;
+       else
+               count_offsets[index] = offset;
+
+       return offset;
+}
+
+static int amd_core_hw_config(struct perf_event *event)
+{
+       if (event->attr.exclude_host && event->attr.exclude_guest)
+               /*
+                * When HO == GO == 1 the hardware treats that as GO == HO == 0
+                * and will count in both modes. We don't want to count in that
+                * case so we emulate no-counting by setting US = OS = 0.
+                */
+               event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+                                     ARCH_PERFMON_EVENTSEL_OS);
+       else if (event->attr.exclude_host)
+               event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
+       else if (event->attr.exclude_guest)
+               event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
+
+       return 0;
+}
+
+/*
+ * AMD64 events are detected based on their event codes.
+ */
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+       return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
+static inline int amd_is_nb_event(struct hw_perf_event *hwc)
+{
+       return (hwc->config & 0xe0) == 0xe0;
+}
+
+static inline int amd_has_nb(struct cpu_hw_events *cpuc)
+{
+       struct amd_nb *nb = cpuc->amd_nb;
+
+       return nb && nb->nb_id != -1;
+}
+
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+       int ret;
+
+       /* pass precise event sampling to ibs: */
+       if (event->attr.precise_ip && get_ibs_caps())
+               return -ENOENT;
+
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
+       ret = x86_pmu_hw_config(event);
+       if (ret)
+               return ret;
+
+       if (event->attr.type == PERF_TYPE_RAW)
+               event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+       return amd_core_hw_config(event);
+}
+
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+                                          struct perf_event *event)
+{
+       struct amd_nb *nb = cpuc->amd_nb;
+       int i;
+
+       /*
+        * need to scan whole list because event may not have
+        * been assigned during scheduling
+        *
+        * no race condition possible because event can only
+        * be removed on one CPU at a time AND PMU is disabled
+        * when we come here
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               if (cmpxchg(nb->owners + i, event, NULL) == event)
+                       break;
+       }
+}
+
+ /*
+  * AMD64 NorthBridge events need special treatment because
+  * counter access needs to be synchronized across all cores
+  * of a package. Refer to BKDG section 3.12
+  *
+  * NB events are events measuring L3 cache, Hypertransport
+  * traffic. They are identified by an event code >= 0xe00.
+  * They measure events on the NorthBride which is shared
+  * by all cores on a package. NB events are counted on a
+  * shared set of counters. When a NB event is programmed
+  * in a counter, the data actually comes from a shared
+  * counter. Thus, access to those counters needs to be
+  * synchronized.
+  *
+  * We implement the synchronization such that no two cores
+  * can be measuring NB events using the same counters. Thus,
+  * we maintain a per-NB allocation table. The available slot
+  * is propagated using the event_constraint structure.
+  *
+  * We provide only one choice for each NB event based on
+  * the fact that only NB events have restrictions. Consequently,
+  * if a counter is available, there is a guarantee the NB event
+  * will be assigned to it. If no slot is available, an empty
+  * constraint is returned and scheduling will eventually fail
+  * for this event.
+  *
+  * Note that all cores attached the same NB compete for the same
+  * counters to host NB events, this is why we use atomic ops. Some
+  * multi-chip CPUs may have more than one NB.
+  *
+  * Given that resources are allocated (cmpxchg), they must be
+  * eventually freed for others to use. This is accomplished by
+  * calling __amd_put_nb_event_constraints()
+  *
+  * Non NB events are not impacted by this restriction.
+  */
+static struct event_constraint *
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+                              struct event_constraint *c)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct amd_nb *nb = cpuc->amd_nb;
+       struct perf_event *old;
+       int idx, new = -1;
+
+       if (!c)
+               c = &unconstrained;
+
+       if (cpuc->is_fake)
+               return c;
+
+       /*
+        * detect if already present, if so reuse
+        *
+        * cannot merge with actual allocation
+        * because of possible holes
+        *
+        * event can already be present yet not assigned (in hwc->idx)
+        * because of successive calls to x86_schedule_events() from
+        * hw_perf_group_sched_in() without hw_perf_enable()
+        */
+       for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+               if (new == -1 || hwc->idx == idx)
+                       /* assign free slot, prefer hwc->idx */
+                       old = cmpxchg(nb->owners + idx, NULL, event);
+               else if (nb->owners[idx] == event)
+                       /* event already present */
+                       old = event;
+               else
+                       continue;
+
+               if (old && old != event)
+                       continue;
+
+               /* reassign to this slot */
+               if (new != -1)
+                       cmpxchg(nb->owners + new, event, NULL);
+               new = idx;
+
+               /* already present, reuse */
+               if (old == event)
+                       break;
+       }
+
+       if (new == -1)
+               return &emptyconstraint;
+
+       return &nb->event_constraints[new];
+}
+
+static struct amd_nb *amd_alloc_nb(int cpu)
+{
+       struct amd_nb *nb;
+       int i;
+
+       nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
+       if (!nb)
+               return NULL;
+
+       nb->nb_id = -1;
+
+       /*
+        * initialize all possible NB constraints
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               __set_bit(i, nb->event_constraints[i].idxmsk);
+               nb->event_constraints[i].weight = 1;
+       }
+       return nb;
+}
+
+static int amd_pmu_cpu_prepare(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       WARN_ON_ONCE(cpuc->amd_nb);
+
+       if (boot_cpu_data.x86_max_cores < 2)
+               return NOTIFY_OK;
+
+       cpuc->amd_nb = amd_alloc_nb(cpu);
+       if (!cpuc->amd_nb)
+               return NOTIFY_BAD;
+
+       return NOTIFY_OK;
+}
+
+static void amd_pmu_cpu_starting(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
+       struct amd_nb *nb;
+       int i, nb_id;
+
+       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+
+       if (boot_cpu_data.x86_max_cores < 2)
+               return;
+
+       nb_id = amd_get_nb_id(cpu);
+       WARN_ON_ONCE(nb_id == BAD_APICID);
+
+       for_each_online_cpu(i) {
+               nb = per_cpu(cpu_hw_events, i).amd_nb;
+               if (WARN_ON_ONCE(!nb))
+                       continue;
+
+               if (nb->nb_id == nb_id) {
+                       *onln = cpuc->amd_nb;
+                       cpuc->amd_nb = nb;
+                       break;
+               }
+       }
+
+       cpuc->amd_nb->nb_id = nb_id;
+       cpuc->amd_nb->refcnt++;
+}
+
+static void amd_pmu_cpu_dead(int cpu)
+{
+       struct cpu_hw_events *cpuhw;
+
+       if (boot_cpu_data.x86_max_cores < 2)
+               return;
+
+       cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+       if (cpuhw->amd_nb) {
+               struct amd_nb *nb = cpuhw->amd_nb;
+
+               if (nb->nb_id == -1 || --nb->refcnt == 0)
+                       kfree(nb);
+
+               cpuhw->amd_nb = NULL;
+       }
+}
+
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       /*
+        * if not NB event or no NB, then no constraints
+        */
+       if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+               return &unconstrained;
+
+       return __amd_get_nb_event_constraints(cpuc, event, NULL);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+                                     struct perf_event *event)
+{
+       if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+               __amd_put_nb_event_constraints(cpuc, event);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+
+static struct attribute *amd_format_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK    0x000000F0ULL
+
+#define AMD_EVENT_FP           0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS           0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC           0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU           0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE                0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS                0x000000C0ULL
+#define AMD_EVENT_DE           0x000000D0ULL
+#define AMD_EVENT_NB           0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000       FP      PERF_CTL[5:3]
+ * 0x010       FP      PERF_CTL[5:3]
+ * 0x020       LS      PERF_CTL[5:0]
+ * 0x030       LS      PERF_CTL[5:0]
+ * 0x040       DC      PERF_CTL[5:0]
+ * 0x050       DC      PERF_CTL[5:0]
+ * 0x060       CU      PERF_CTL[2:0]
+ * 0x070       CU      PERF_CTL[2:0]
+ * 0x080       IC/DE   PERF_CTL[2:0]
+ * 0x090       IC/DE   PERF_CTL[2:0]
+ * 0x0A0       ---
+ * 0x0B0       ---
+ * 0x0C0       EX/LS   PERF_CTL[5:0]
+ * 0x0D0       DE      PERF_CTL[2:0]
+ * 0x0E0       NB      NB_PERF_CTL[3:0]
+ * 0x0F0       NB      NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x000       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
+ * 0x003       FP      PERF_CTL[3]
+ * 0x004       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
+ * 0x00B       FP      PERF_CTL[3]
+ * 0x00D       FP      PERF_CTL[3]
+ * 0x023       DE      PERF_CTL[2:0]
+ * 0x02D       LS      PERF_CTL[3]
+ * 0x02E       LS      PERF_CTL[3,0]
+ * 0x031       LS      PERF_CTL[2:0] (**)
+ * 0x043       CU      PERF_CTL[2:0]
+ * 0x045       CU      PERF_CTL[2:0]
+ * 0x046       CU      PERF_CTL[2:0]
+ * 0x054       CU      PERF_CTL[2:0]
+ * 0x055       CU      PERF_CTL[2:0]
+ * 0x08F       IC      PERF_CTL[0]
+ * 0x187       DE      PERF_CTL[0]
+ * 0x188       DE      PERF_CTL[0]
+ * 0x0DB       EX      PERF_CTL[5:0]
+ * 0x0DC       LS      PERF_CTL[5:0]
+ * 0x0DD       LS      PERF_CTL[5:0]
+ * 0x0DE       LS      PERF_CTL[5:0]
+ * 0x0DF       LS      PERF_CTL[5:0]
+ * 0x1C0       EX      PERF_CTL[5:3]
+ * 0x1D6       EX      PERF_CTL[5:0]
+ * 0x1D8       EX      PERF_CTL[5:0]
+ *
+ * (*)  depending on the umask all FPU counters may be used
+ * (**) only one unitmask enabled at a time
+ */
+
+static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
+                              struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned int event_code = amd_get_event_code(hwc);
+
+       switch (event_code & AMD_EVENT_TYPE_MASK) {
+       case AMD_EVENT_FP:
+               switch (event_code) {
+               case 0x000:
+                       if (!(hwc->config & 0x0000F000ULL))
+                               break;
+                       if (!(hwc->config & 0x00000F00ULL))
+                               break;
+                       return &amd_f15_PMC3;
+               case 0x004:
+                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+                               break;
+                       return &amd_f15_PMC3;
+               case 0x003:
+               case 0x00B:
+               case 0x00D:
+                       return &amd_f15_PMC3;
+               }
+               return &amd_f15_PMC53;
+       case AMD_EVENT_LS:
+       case AMD_EVENT_DC:
+       case AMD_EVENT_EX_LS:
+               switch (event_code) {
+               case 0x023:
+               case 0x043:
+               case 0x045:
+               case 0x046:
+               case 0x054:
+               case 0x055:
+                       return &amd_f15_PMC20;
+               case 0x02D:
+                       return &amd_f15_PMC3;
+               case 0x02E:
+                       return &amd_f15_PMC30;
+               case 0x031:
+                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
+                               return &amd_f15_PMC20;
+                       return &emptyconstraint;
+               case 0x1C0:
+                       return &amd_f15_PMC53;
+               default:
+                       return &amd_f15_PMC50;
+               }
+       case AMD_EVENT_CU:
+       case AMD_EVENT_IC_DE:
+       case AMD_EVENT_DE:
+               switch (event_code) {
+               case 0x08F:
+               case 0x187:
+               case 0x188:
+                       return &amd_f15_PMC0;
+               case 0x0DB ... 0x0DF:
+               case 0x1D6:
+               case 0x1D8:
+                       return &amd_f15_PMC50;
+               default:
+                       return &amd_f15_PMC20;
+               }
+       case AMD_EVENT_NB:
+               /* moved to perf_event_amd_uncore.c */
+               return &emptyconstraint;
+       default:
+               return &emptyconstraint;
+       }
+}
+
+static ssize_t amd_event_sysfs_show(char *page, u64 config)
+{
+       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
+                   (config & AMD64_EVENTSEL_EVENT) >> 24;
+
+       return x86_event_sysfs_show(page, config, event);
+}
+
+static __initconst const struct x86_pmu amd_pmu = {
+       .name                   = "AMD",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = x86_pmu_disable_all,
+       .enable_all             = x86_pmu_enable_all,
+       .enable                 = x86_pmu_enable_event,
+       .disable                = x86_pmu_disable_event,
+       .hw_config              = amd_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_K7_EVNTSEL0,
+       .perfctr                = MSR_K7_PERFCTR0,
+       .addr_offset            = amd_pmu_addr_offset,
+       .event_map              = amd_pmu_event_map,
+       .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
+       .num_counters           = AMD64_NUM_COUNTERS,
+       .cntval_bits            = 48,
+       .cntval_mask            = (1ULL << 48) - 1,
+       .apic                   = 1,
+       /* use highest bit to detect overflow */
+       .max_period             = (1ULL << 47) - 1,
+       .get_event_constraints  = amd_get_event_constraints,
+       .put_event_constraints  = amd_put_event_constraints,
+
+       .format_attrs           = amd_format_attr,
+       .events_sysfs_show      = amd_event_sysfs_show,
+
+       .cpu_prepare            = amd_pmu_cpu_prepare,
+       .cpu_starting           = amd_pmu_cpu_starting,
+       .cpu_dead               = amd_pmu_cpu_dead,
+};
+
+static int __init amd_core_pmu_init(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+               return 0;
+
+       switch (boot_cpu_data.x86) {
+       case 0x15:
+               pr_cont("Fam15h ");
+               x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
+               break;
+
+       default:
+               pr_err("core perfctr but no constraints; unknown hardware!\n");
+               return -ENODEV;
+       }
+
+       /*
+        * If core performance counter extensions exists, we must use
+        * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
+        * amd_pmu_addr_offset().
+        */
+       x86_pmu.eventsel        = MSR_F15H_PERF_CTL;
+       x86_pmu.perfctr         = MSR_F15H_PERF_CTR;
+       x86_pmu.num_counters    = AMD64_NUM_COUNTERS_CORE;
+
+       pr_cont("core perfctr, ");
+       return 0;
+}
+
+__init int amd_pmu_init(void)
+{
+       int ret;
+
+       /* Performance-monitoring supported from K7 and later: */
+       if (boot_cpu_data.x86 < 6)
+               return -ENODEV;
+
+       x86_pmu = amd_pmu;
+
+       ret = amd_core_pmu_init();
+       if (ret)
+               return ret;
+
+       /* Events are common for all AMDs */
+       memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+              sizeof(hw_cache_event_ids));
+
+       return 0;
+}
+
+void amd_pmu_enable_virt(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       cpuc->perf_ctr_virt_mask = 0;
+
+       /* Reload all events */
+       x86_pmu_disable_all();
+       x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
+
+void amd_pmu_disable_virt(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * We only mask out the Host-only bit so that host-only counting works
+        * when SVM is disabled. If someone sets up a guest-only counter when
+        * SVM is disabled the Guest-only bits still gets set and the counter
+        * will not count anything.
+        */
+       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+
+       /* Reload all events */
+       x86_pmu_disable_all();
+       x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
new file mode 100644 (file)
index 0000000..51087c2
--- /dev/null
@@ -0,0 +1,959 @@
+/*
+ * Performance events - AMD IBS
+ *
+ *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/ptrace.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+static u32 ibs_caps;
+
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
+
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+
+#include <asm/nmi.h>
+
+#define IBS_FETCH_CONFIG_MASK  (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
+#define IBS_OP_CONFIG_MASK     IBS_OP_MAX_CNT
+
+enum ibs_states {
+       IBS_ENABLED     = 0,
+       IBS_STARTED     = 1,
+       IBS_STOPPING    = 2,
+
+       IBS_MAX_STATES,
+};
+
+struct cpu_perf_ibs {
+       struct perf_event       *event;
+       unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
+};
+
+struct perf_ibs {
+       struct pmu                      pmu;
+       unsigned int                    msr;
+       u64                             config_mask;
+       u64                             cnt_mask;
+       u64                             enable_mask;
+       u64                             valid_mask;
+       u64                             max_period;
+       unsigned long                   offset_mask[1];
+       int                             offset_max;
+       struct cpu_perf_ibs __percpu    *pcpu;
+
+       struct attribute                **format_attrs;
+       struct attribute_group          format_group;
+       const struct attribute_group    *attr_groups[2];
+
+       u64                             (*get_count)(u64 config);
+};
+
+struct perf_ibs_data {
+       u32             size;
+       union {
+               u32     data[0];        /* data buffer starts here */
+               u32     caps;
+       };
+       u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
+};
+
+static int
+perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
+{
+       s64 left = local64_read(&hwc->period_left);
+       s64 period = hwc->sample_period;
+       int overflow = 0;
+
+       /*
+        * If we are way outside a reasonable range then just skip forward:
+        */
+       if (unlikely(left <= -period)) {
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               overflow = 1;
+       }
+
+       if (unlikely(left < (s64)min)) {
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               overflow = 1;
+       }
+
+       /*
+        * If the hw period that triggers the sw overflow is too short
+        * we might hit the irq handler. This biases the results.
+        * Thus we shorten the next-to-last period and set the last
+        * period to the max period.
+        */
+       if (left > max) {
+               left -= max;
+               if (left > max)
+                       left = max;
+               else if (left < min)
+                       left = min;
+       }
+
+       *hw_period = (u64)left;
+
+       return overflow;
+}
+
+static  int
+perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       int shift = 64 - width;
+       u64 prev_raw_count;
+       u64 delta;
+
+       /*
+        * Careful: an NMI might modify the previous event value.
+        *
+        * Our tactic to handle this is to first atomically read and
+        * exchange a new raw count - then add that new-prev delta
+        * count to the generic event atomically:
+        */
+       prev_raw_count = local64_read(&hwc->prev_count);
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                                       new_raw_count) != prev_raw_count)
+               return 0;
+
+       /*
+        * Now we have the new raw value and have updated the prev
+        * timestamp already. We can now calculate the elapsed delta
+        * (event-)time and add that to the generic event.
+        *
+        * Careful, not all hw sign-extends above the physical width
+        * of the count.
+        */
+       delta = (new_raw_count << shift) - (prev_raw_count << shift);
+       delta >>= shift;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+
+       return 1;
+}
+
+static struct perf_ibs perf_ibs_fetch;
+static struct perf_ibs perf_ibs_op;
+
+static struct perf_ibs *get_ibs_pmu(int type)
+{
+       if (perf_ibs_fetch.pmu.type == type)
+               return &perf_ibs_fetch;
+       if (perf_ibs_op.pmu.type == type)
+               return &perf_ibs_op;
+       return NULL;
+}
+
+/*
+ * Use IBS for precise event sampling:
+ *
+ *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
+ *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
+ *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
+ *
+ * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
+ * MSRC001_1033) is used to select either cycle or micro-ops counting
+ * mode.
+ *
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ *
+ */
+static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+{
+       switch (event->attr.precise_ip) {
+       case 0:
+               return -ENOENT;
+       case 1:
+       case 2:
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       switch (event->attr.type) {
+       case PERF_TYPE_HARDWARE:
+               switch (event->attr.config) {
+               case PERF_COUNT_HW_CPU_CYCLES:
+                       *config = 0;
+                       return 0;
+               }
+               break;
+       case PERF_TYPE_RAW:
+               switch (event->attr.config) {
+               case 0x0076:
+                       *config = 0;
+                       return 0;
+               case 0x00C1:
+                       *config = IBS_OP_CNT_CTL;
+                       return 0;
+               }
+               break;
+       default:
+               return -ENOENT;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static const struct perf_event_attr ibs_notsupp = {
+       .exclude_user   = 1,
+       .exclude_kernel = 1,
+       .exclude_hv     = 1,
+       .exclude_idle   = 1,
+       .exclude_host   = 1,
+       .exclude_guest  = 1,
+};
+
+static int perf_ibs_init(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_ibs *perf_ibs;
+       u64 max_cnt, config;
+       int ret;
+
+       perf_ibs = get_ibs_pmu(event->attr.type);
+       if (perf_ibs) {
+               config = event->attr.config;
+       } else {
+               perf_ibs = &perf_ibs_op;
+               ret = perf_ibs_precise_event(event, &config);
+               if (ret)
+                       return ret;
+       }
+
+       if (event->pmu != &perf_ibs->pmu)
+               return -ENOENT;
+
+       if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
+               return -EINVAL;
+
+       if (config & ~perf_ibs->config_mask)
+               return -EINVAL;
+
+       if (hwc->sample_period) {
+               if (config & perf_ibs->cnt_mask)
+                       /* raw max_cnt may not be set */
+                       return -EINVAL;
+               if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
+                       /*
+                        * lower 4 bits can not be set in ibs max cnt,
+                        * but allowing it in case we adjust the
+                        * sample period to set a frequency.
+                        */
+                       return -EINVAL;
+               hwc->sample_period &= ~0x0FULL;
+               if (!hwc->sample_period)
+                       hwc->sample_period = 0x10;
+       } else {
+               max_cnt = config & perf_ibs->cnt_mask;
+               config &= ~perf_ibs->cnt_mask;
+               event->attr.sample_period = max_cnt << 4;
+               hwc->sample_period = event->attr.sample_period;
+       }
+
+       if (!hwc->sample_period)
+               return -EINVAL;
+
+       /*
+        * If we modify hwc->sample_period, we also need to update
+        * hwc->last_period and hwc->period_left.
+        */
+       hwc->last_period = hwc->sample_period;
+       local64_set(&hwc->period_left, hwc->sample_period);
+
+       hwc->config_base = perf_ibs->msr;
+       hwc->config = config;
+
+       return 0;
+}
+
+static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
+                              struct hw_perf_event *hwc, u64 *period)
+{
+       int overflow;
+
+       /* ignore lower 4 bits in min count: */
+       overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
+       local64_set(&hwc->prev_count, 0);
+
+       return overflow;
+}
+
+static u64 get_ibs_fetch_count(u64 config)
+{
+       return (config & IBS_FETCH_CNT) >> 12;
+}
+
+static u64 get_ibs_op_count(u64 config)
+{
+       u64 count = 0;
+
+       if (config & IBS_OP_VAL)
+               count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
+
+       if (ibs_caps & IBS_CAPS_RDWROPCNT)
+               count += (config & IBS_OP_CUR_CNT) >> 32;
+
+       return count;
+}
+
+static void
+perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
+                     u64 *config)
+{
+       u64 count = perf_ibs->get_count(*config);
+
+       /*
+        * Set width to 64 since we do not overflow on max width but
+        * instead on max count. In perf_ibs_set_period() we clear
+        * prev count manually on overflow.
+        */
+       while (!perf_event_try_update(event, count, 64)) {
+               rdmsrl(event->hw.config_base, *config);
+               count = perf_ibs->get_count(*config);
+       }
+}
+
+static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
+                                        struct hw_perf_event *hwc, u64 config)
+{
+       wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
+}
+
+/*
+ * Erratum #420 Instruction-Based Sampling Engine May Generate
+ * Interrupt that Cannot Be Cleared:
+ *
+ * Must clear counter mask first, then clear the enable bit. See
+ * Revision Guide for AMD Family 10h Processors, Publication #41322.
+ */
+static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
+                                         struct hw_perf_event *hwc, u64 config)
+{
+       config &= ~perf_ibs->cnt_mask;
+       wrmsrl(hwc->config_base, config);
+       config &= ~perf_ibs->enable_mask;
+       wrmsrl(hwc->config_base, config);
+}
+
+/*
+ * We cannot restore the ibs pmu state, so we always needs to update
+ * the event while stopping it and then reset the state when starting
+ * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
+ * perf_ibs_start()/perf_ibs_stop() and instead always do it.
+ */
+static void perf_ibs_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+       u64 period;
+
+       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+               return;
+
+       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+       hwc->state = 0;
+
+       perf_ibs_set_period(perf_ibs, hwc, &period);
+       set_bit(IBS_STARTED, pcpu->state);
+       perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+       perf_event_update_userpage(event);
+}
+
+static void perf_ibs_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+       u64 config;
+       int stopping;
+
+       stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
+
+       if (!stopping && (hwc->state & PERF_HES_UPTODATE))
+               return;
+
+       rdmsrl(hwc->config_base, config);
+
+       if (stopping) {
+               set_bit(IBS_STOPPING, pcpu->state);
+               perf_ibs_disable_event(perf_ibs, hwc, config);
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+       }
+
+       if (hwc->state & PERF_HES_UPTODATE)
+               return;
+
+       /*
+        * Clear valid bit to not count rollovers on update, rollovers
+        * are only updated in the irq handler.
+        */
+       config &= ~perf_ibs->valid_mask;
+
+       perf_ibs_event_update(perf_ibs, event, &config);
+       hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_ibs_add(struct perf_event *event, int flags)
+{
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+       if (test_and_set_bit(IBS_ENABLED, pcpu->state))
+               return -ENOSPC;
+
+       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       pcpu->event = event;
+
+       if (flags & PERF_EF_START)
+               perf_ibs_start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+static void perf_ibs_del(struct perf_event *event, int flags)
+{
+       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+       if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
+               return;
+
+       perf_ibs_stop(event, PERF_EF_UPDATE);
+
+       pcpu->event = NULL;
+
+       perf_event_update_userpage(event);
+}
+
+static void perf_ibs_read(struct perf_event *event) { }
+
+PMU_FORMAT_ATTR(rand_en,       "config:57");
+PMU_FORMAT_ATTR(cnt_ctl,       "config:19");
+
+static struct attribute *ibs_fetch_format_attrs[] = {
+       &format_attr_rand_en.attr,
+       NULL,
+};
+
+static struct attribute *ibs_op_format_attrs[] = {
+       NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+       NULL,
+};
+
+static struct perf_ibs perf_ibs_fetch = {
+       .pmu = {
+               .task_ctx_nr    = perf_invalid_context,
+
+               .event_init     = perf_ibs_init,
+               .add            = perf_ibs_add,
+               .del            = perf_ibs_del,
+               .start          = perf_ibs_start,
+               .stop           = perf_ibs_stop,
+               .read           = perf_ibs_read,
+       },
+       .msr                    = MSR_AMD64_IBSFETCHCTL,
+       .config_mask            = IBS_FETCH_CONFIG_MASK,
+       .cnt_mask               = IBS_FETCH_MAX_CNT,
+       .enable_mask            = IBS_FETCH_ENABLE,
+       .valid_mask             = IBS_FETCH_VAL,
+       .max_period             = IBS_FETCH_MAX_CNT << 4,
+       .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
+       .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
+       .format_attrs           = ibs_fetch_format_attrs,
+
+       .get_count              = get_ibs_fetch_count,
+};
+
+static struct perf_ibs perf_ibs_op = {
+       .pmu = {
+               .task_ctx_nr    = perf_invalid_context,
+
+               .event_init     = perf_ibs_init,
+               .add            = perf_ibs_add,
+               .del            = perf_ibs_del,
+               .start          = perf_ibs_start,
+               .stop           = perf_ibs_stop,
+               .read           = perf_ibs_read,
+       },
+       .msr                    = MSR_AMD64_IBSOPCTL,
+       .config_mask            = IBS_OP_CONFIG_MASK,
+       .cnt_mask               = IBS_OP_MAX_CNT,
+       .enable_mask            = IBS_OP_ENABLE,
+       .valid_mask             = IBS_OP_VAL,
+       .max_period             = IBS_OP_MAX_CNT << 4,
+       .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
+       .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
+       .format_attrs           = ibs_op_format_attrs,
+
+       .get_count              = get_ibs_op_count,
+};
+
+static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
+{
+       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+       struct perf_event *event = pcpu->event;
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_sample_data data;
+       struct perf_raw_record raw;
+       struct pt_regs regs;
+       struct perf_ibs_data ibs_data;
+       int offset, size, check_rip, offset_max, throttle = 0;
+       unsigned int msr;
+       u64 *buf, *config, period;
+
+       if (!test_bit(IBS_STARTED, pcpu->state)) {
+               /*
+                * Catch spurious interrupts after stopping IBS: After
+                * disabling IBS there could be still incoming NMIs
+                * with samples that even have the valid bit cleared.
+                * Mark all this NMIs as handled.
+                */
+               return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
+       }
+
+       msr = hwc->config_base;
+       buf = ibs_data.regs;
+       rdmsrl(msr, *buf);
+       if (!(*buf++ & perf_ibs->valid_mask))
+               return 0;
+
+       config = &ibs_data.regs[0];
+       perf_ibs_event_update(perf_ibs, event, config);
+       perf_sample_data_init(&data, 0, hwc->last_period);
+       if (!perf_ibs_set_period(perf_ibs, hwc, &period))
+               goto out;       /* no sw counter overflow */
+
+       ibs_data.caps = ibs_caps;
+       size = 1;
+       offset = 1;
+       check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
+       if (event->attr.sample_type & PERF_SAMPLE_RAW)
+               offset_max = perf_ibs->offset_max;
+       else if (check_rip)
+               offset_max = 2;
+       else
+               offset_max = 1;
+       do {
+               rdmsrl(msr + offset, *buf++);
+               size++;
+               offset = find_next_bit(perf_ibs->offset_mask,
+                                      perf_ibs->offset_max,
+                                      offset + 1);
+       } while (offset < offset_max);
+       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+               /*
+                * Read IbsBrTarget and IbsOpData4 separately
+                * depending on their availability.
+                * Can't add to offset_max as they are staggered
+                */
+               if (ibs_caps & IBS_CAPS_BRNTRGT) {
+                       rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
+                       size++;
+               }
+               if (ibs_caps & IBS_CAPS_OPDATA4) {
+                       rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
+                       size++;
+               }
+       }
+       ibs_data.size = sizeof(u64) * size;
+
+       regs = *iregs;
+       if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
+               regs.flags &= ~PERF_EFLAGS_EXACT;
+       } else {
+               set_linear_ip(&regs, ibs_data.regs[1]);
+               regs.flags |= PERF_EFLAGS_EXACT;
+       }
+
+       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+               raw.size = sizeof(u32) + ibs_data.size;
+               raw.data = ibs_data.data;
+               data.raw = &raw;
+       }
+
+       throttle = perf_event_overflow(event, &data, &regs);
+out:
+       if (throttle)
+               perf_ibs_disable_event(perf_ibs, hwc, *config);
+       else
+               perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+
+       perf_event_update_userpage(event);
+
+       return 1;
+}
+
+static int
+perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+       int handled = 0;
+
+       handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
+       handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
+
+       if (handled)
+               inc_irq_stat(apic_perf_irqs);
+
+       return handled;
+}
+NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
+
+static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
+{
+       struct cpu_perf_ibs __percpu *pcpu;
+       int ret;
+
+       pcpu = alloc_percpu(struct cpu_perf_ibs);
+       if (!pcpu)
+               return -ENOMEM;
+
+       perf_ibs->pcpu = pcpu;
+
+       /* register attributes */
+       if (perf_ibs->format_attrs[0]) {
+               memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
+               perf_ibs->format_group.name     = "format";
+               perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
+
+               memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
+               perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
+               perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
+       }
+
+       ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
+       if (ret) {
+               perf_ibs->pcpu = NULL;
+               free_percpu(pcpu);
+       }
+
+       return ret;
+}
+
+static __init int perf_event_ibs_init(void)
+{
+       struct attribute **attr = ibs_op_format_attrs;
+
+       if (!ibs_caps)
+               return -ENODEV; /* ibs not supported by the cpu */
+
+       perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
+
+       if (ibs_caps & IBS_CAPS_OPCNT) {
+               perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+               *attr++ = &format_attr_cnt_ctl.attr;
+       }
+       perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+
+       register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
+       pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
+
+       return 0;
+}
+
+#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
+
+static __init int perf_event_ibs_init(void) { return 0; }
+
+#endif
+
+/* IBS - apic initialization, for perf and oprofile */
+
+static __init u32 __get_ibs_caps(void)
+{
+       u32 caps;
+       unsigned int max_level;
+
+       if (!boot_cpu_has(X86_FEATURE_IBS))
+               return 0;
+
+       /* check IBS cpuid feature flags */
+       max_level = cpuid_eax(0x80000000);
+       if (max_level < IBS_CPUID_FEATURES)
+               return IBS_CAPS_DEFAULT;
+
+       caps = cpuid_eax(IBS_CPUID_FEATURES);
+       if (!(caps & IBS_CAPS_AVAIL))
+               /* cpuid flags not valid */
+               return IBS_CAPS_DEFAULT;
+
+       return caps;
+}
+
+u32 get_ibs_caps(void)
+{
+       return ibs_caps;
+}
+
+EXPORT_SYMBOL(get_ibs_caps);
+
+static inline int get_eilvt(int offset)
+{
+       return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
+}
+
+static inline int put_eilvt(int offset)
+{
+       return !setup_APIC_eilvt(offset, 0, 0, 1);
+}
+
+/*
+ * Check and reserve APIC extended interrupt LVT offset for IBS if available.
+ */
+static inline int ibs_eilvt_valid(void)
+{
+       int offset;
+       u64 val;
+       int valid = 0;
+
+       preempt_disable();
+
+       rdmsrl(MSR_AMD64_IBSCTL, val);
+       offset = val & IBSCTL_LVT_OFFSET_MASK;
+
+       if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
+               pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+               goto out;
+       }
+
+       if (!get_eilvt(offset)) {
+               pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
+                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
+               goto out;
+       }
+
+       valid = 1;
+out:
+       preempt_enable();
+
+       return valid;
+}
+
+static int setup_ibs_ctl(int ibs_eilvt_off)
+{
+       struct pci_dev *cpu_cfg;
+       int nodes;
+       u32 value = 0;
+
+       nodes = 0;
+       cpu_cfg = NULL;
+       do {
+               cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
+                                        PCI_DEVICE_ID_AMD_10H_NB_MISC,
+                                        cpu_cfg);
+               if (!cpu_cfg)
+                       break;
+               ++nodes;
+               pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
+                                      | IBSCTL_LVT_OFFSET_VALID);
+               pci_read_config_dword(cpu_cfg, IBSCTL, &value);
+               if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
+                       pci_dev_put(cpu_cfg);
+                       pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n",
+                                value);
+                       return -EINVAL;
+               }
+       } while (1);
+
+       if (!nodes) {
+               pr_debug("No CPU node configured for IBS\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
+ * is using the new offset.
+ */
+static void force_ibs_eilvt_setup(void)
+{
+       int offset;
+       int ret;
+
+       preempt_disable();
+       /* find the next free available EILVT entry, skip offset 0 */
+       for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
+               if (get_eilvt(offset))
+                       break;
+       }
+       preempt_enable();
+
+       if (offset == APIC_EILVT_NR_MAX) {
+               pr_debug("No EILVT entry available\n");
+               return;
+       }
+
+       ret = setup_ibs_ctl(offset);
+       if (ret)
+               goto out;
+
+       if (!ibs_eilvt_valid())
+               goto out;
+
+       pr_info("IBS: LVT offset %d assigned\n", offset);
+
+       return;
+out:
+       preempt_disable();
+       put_eilvt(offset);
+       preempt_enable();
+       return;
+}
+
+static void ibs_eilvt_setup(void)
+{
+       /*
+        * Force LVT offset assignment for family 10h: The offsets are
+        * not assigned by the BIOS for this family, so the OS is
+        * responsible for doing it. If the OS assignment fails, fall
+        * back to BIOS settings and try to setup this.
+        */
+       if (boot_cpu_data.x86 == 0x10)
+               force_ibs_eilvt_setup();
+}
+
+static inline int get_ibs_lvt_offset(void)
+{
+       u64 val;
+
+       rdmsrl(MSR_AMD64_IBSCTL, val);
+       if (!(val & IBSCTL_LVT_OFFSET_VALID))
+               return -EINVAL;
+
+       return val & IBSCTL_LVT_OFFSET_MASK;
+}
+
+static void setup_APIC_ibs(void *dummy)
+{
+       int offset;
+
+       offset = get_ibs_lvt_offset();
+       if (offset < 0)
+               goto failed;
+
+       if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
+               return;
+failed:
+       pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
+               smp_processor_id());
+}
+
+static void clear_APIC_ibs(void *dummy)
+{
+       int offset;
+
+       offset = get_ibs_lvt_offset();
+       if (offset >= 0)
+               setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
+}
+
+#ifdef CONFIG_PM
+
+static int perf_ibs_suspend(void)
+{
+       clear_APIC_ibs(NULL);
+       return 0;
+}
+
+static void perf_ibs_resume(void)
+{
+       ibs_eilvt_setup();
+       setup_APIC_ibs(NULL);
+}
+
+static struct syscore_ops perf_ibs_syscore_ops = {
+       .resume         = perf_ibs_resume,
+       .suspend        = perf_ibs_suspend,
+};
+
+static void perf_ibs_pm_init(void)
+{
+       register_syscore_ops(&perf_ibs_syscore_ops);
+}
+
+#else
+
+static inline void perf_ibs_pm_init(void) { }
+
+#endif
+
+static int
+perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_STARTING:
+               setup_APIC_ibs(NULL);
+               break;
+       case CPU_DYING:
+               clear_APIC_ibs(NULL);
+               break;
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static __init int amd_ibs_init(void)
+{
+       u32 caps;
+       int ret = -EINVAL;
+
+       caps = __get_ibs_caps();
+       if (!caps)
+               return -ENODEV; /* ibs not supported by the cpu */
+
+       ibs_eilvt_setup();
+
+       if (!ibs_eilvt_valid())
+               goto out;
+
+       perf_ibs_pm_init();
+       cpu_notifier_register_begin();
+       ibs_caps = caps;
+       /* make ibs_caps visible to other cpus: */
+       smp_mb();
+       smp_call_function(setup_APIC_ibs, NULL, 1);
+       __perf_cpu_notifier(perf_ibs_cpu_notifier);
+       cpu_notifier_register_done();
+
+       ret = perf_event_ibs_init();
+out:
+       if (ret)
+               pr_err("Failed to setup IBS, %d\n", ret);
+       return ret;
+}
+
+/* Since we need the pci subsystem to init ibs we can't do this earlier: */
+device_initcall(amd_ibs_init);
diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
new file mode 100644 (file)
index 0000000..635e5eb
--- /dev/null
@@ -0,0 +1,499 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+
+#include "../perf_event.h"
+#include "iommu.h"
+
+#define COUNTER_SHIFT          16
+
+#define _GET_BANK(ev)       ((u8)(ev->hw.extra_reg.reg >> 8))
+#define _GET_CNTR(ev)       ((u8)(ev->hw.extra_reg.reg))
+
+/* iommu pmu config masks */
+#define _GET_CSOURCE(ev)    ((ev->hw.config & 0xFFULL))
+#define _GET_DEVID(ev)      ((ev->hw.config >> 8)  & 0xFFFFULL)
+#define _GET_PASID(ev)      ((ev->hw.config >> 24) & 0xFFFFULL)
+#define _GET_DOMID(ev)      ((ev->hw.config >> 40) & 0xFFFFULL)
+#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config)  & 0xFFFFULL)
+#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
+#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
+
+static struct perf_amd_iommu __perf_iommu;
+
+struct perf_amd_iommu {
+       struct pmu pmu;
+       u8 max_banks;
+       u8 max_counters;
+       u64 cntr_assign_mask;
+       raw_spinlock_t lock;
+       const struct attribute_group *attr_groups[4];
+};
+
+#define format_group   attr_groups[0]
+#define cpumask_group  attr_groups[1]
+#define events_group   attr_groups[2]
+#define null_group     attr_groups[3]
+
+/*---------------------------------------------
+ * sysfs format attributes
+ *---------------------------------------------*/
+PMU_FORMAT_ATTR(csource,    "config:0-7");
+PMU_FORMAT_ATTR(devid,      "config:8-23");
+PMU_FORMAT_ATTR(pasid,      "config:24-39");
+PMU_FORMAT_ATTR(domid,      "config:40-55");
+PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
+PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
+PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
+
+static struct attribute *iommu_format_attrs[] = {
+       &format_attr_csource.attr,
+       &format_attr_devid.attr,
+       &format_attr_pasid.attr,
+       &format_attr_domid.attr,
+       &format_attr_devid_mask.attr,
+       &format_attr_pasid_mask.attr,
+       &format_attr_domid_mask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_iommu_format_group = {
+       .name = "format",
+       .attrs = iommu_format_attrs,
+};
+
+/*---------------------------------------------
+ * sysfs events attributes
+ *---------------------------------------------*/
+struct amd_iommu_event_desc {
+       struct kobj_attribute attr;
+       const char *event;
+};
+
+static ssize_t _iommu_event_show(struct kobject *kobj,
+                               struct kobj_attribute *attr, char *buf)
+{
+       struct amd_iommu_event_desc *event =
+               container_of(attr, struct amd_iommu_event_desc, attr);
+       return sprintf(buf, "%s\n", event->event);
+}
+
+#define AMD_IOMMU_EVENT_DESC(_name, _event)                    \
+{                                                              \
+       .attr  = __ATTR(_name, 0444, _iommu_event_show, NULL),  \
+       .event = _event,                                        \
+}
+
+static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
+       AMD_IOMMU_EVENT_DESC(mem_pass_untrans,        "csource=0x01"),
+       AMD_IOMMU_EVENT_DESC(mem_pass_pretrans,       "csource=0x02"),
+       AMD_IOMMU_EVENT_DESC(mem_pass_excl,           "csource=0x03"),
+       AMD_IOMMU_EVENT_DESC(mem_target_abort,        "csource=0x04"),
+       AMD_IOMMU_EVENT_DESC(mem_trans_total,         "csource=0x05"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit,   "csource=0x06"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis,   "csource=0x07"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit,   "csource=0x08"),
+       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis,   "csource=0x09"),
+       AMD_IOMMU_EVENT_DESC(mem_dte_hit,             "csource=0x0a"),
+       AMD_IOMMU_EVENT_DESC(mem_dte_mis,             "csource=0x0b"),
+       AMD_IOMMU_EVENT_DESC(page_tbl_read_tot,       "csource=0x0c"),
+       AMD_IOMMU_EVENT_DESC(page_tbl_read_nst,       "csource=0x0d"),
+       AMD_IOMMU_EVENT_DESC(page_tbl_read_gst,       "csource=0x0e"),
+       AMD_IOMMU_EVENT_DESC(int_dte_hit,             "csource=0x0f"),
+       AMD_IOMMU_EVENT_DESC(int_dte_mis,             "csource=0x10"),
+       AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
+       AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
+       AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
+       { /* end: all zeroes */ },
+};
+
+/*---------------------------------------------
+ * sysfs cpumask attributes
+ *---------------------------------------------*/
+static cpumask_t iommu_cpumask;
+
+static ssize_t _iommu_cpumask_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
+
+static struct attribute *iommu_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_iommu_cpumask_group = {
+       .attrs = iommu_cpumask_attrs,
+};
+
+/*---------------------------------------------*/
+
+static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
+{
+       unsigned long flags;
+       int shift, bank, cntr, retval;
+       int max_banks = perf_iommu->max_banks;
+       int max_cntrs = perf_iommu->max_counters;
+
+       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+
+       for (bank = 0, shift = 0; bank < max_banks; bank++) {
+               for (cntr = 0; cntr < max_cntrs; cntr++) {
+                       shift = bank + (bank*3) + cntr;
+                       if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
+                               continue;
+                       } else {
+                               perf_iommu->cntr_assign_mask |= (1ULL<<shift);
+                               retval = ((u16)((u16)bank<<8) | (u8)(cntr));
+                               goto out;
+                       }
+               }
+       }
+       retval = -ENOSPC;
+out:
+       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+       return retval;
+}
+
+static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
+                                       u8 bank, u8 cntr)
+{
+       unsigned long flags;
+       int max_banks, max_cntrs;
+       int shift = 0;
+
+       max_banks = perf_iommu->max_banks;
+       max_cntrs = perf_iommu->max_counters;
+
+       if ((bank > max_banks) || (cntr > max_cntrs))
+               return -EINVAL;
+
+       shift = bank + cntr + (bank*3);
+
+       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+       perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
+       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+
+       return 0;
+}
+
+static int perf_iommu_event_init(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct perf_amd_iommu *perf_iommu;
+       u64 config, config1;
+
+       /* test the event attr type check for PMU enumeration */
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /*
+        * IOMMU counters are shared across all cores.
+        * Therefore, it does not support per-process mode.
+        * Also, it does not support event sampling mode.
+        */
+       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+               return -EINVAL;
+
+       /* IOMMU counters do not have usr/os/guest/host bits */
+       if (event->attr.exclude_user || event->attr.exclude_kernel ||
+           event->attr.exclude_host || event->attr.exclude_guest)
+               return -EINVAL;
+
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       perf_iommu = &__perf_iommu;
+
+       if (event->pmu != &perf_iommu->pmu)
+               return -ENOENT;
+
+       if (perf_iommu) {
+               config = event->attr.config;
+               config1 = event->attr.config1;
+       } else {
+               return -EINVAL;
+       }
+
+       /* integrate with iommu base devid (0000), assume one iommu */
+       perf_iommu->max_banks =
+               amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
+       perf_iommu->max_counters =
+               amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
+       if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
+               return -EINVAL;
+
+       /* update the hw_perf_event struct with the iommu config data */
+       hwc->config = config;
+       hwc->extra_reg.config = config1;
+
+       return 0;
+}
+
+static void perf_iommu_enable_event(struct perf_event *ev)
+{
+       u8 csource = _GET_CSOURCE(ev);
+       u16 devid = _GET_DEVID(ev);
+       u64 reg = 0ULL;
+
+       reg = csource;
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+
+       reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+       if (reg)
+               reg |= (1UL << 31);
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_DEVID_MATCH_REG, &reg, true);
+
+       reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+       if (reg)
+               reg |= (1UL << 31);
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_PASID_MATCH_REG, &reg, true);
+
+       reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+       if (reg)
+               reg |= (1UL << 31);
+       amd_iommu_pc_get_set_reg_val(devid,
+                       _GET_BANK(ev), _GET_CNTR(ev) ,
+                        IOMMU_PC_DOMID_MATCH_REG, &reg, true);
+}
+
+static void perf_iommu_disable_event(struct perf_event *event)
+{
+       u64 reg = 0ULL;
+
+       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+                       _GET_BANK(event), _GET_CNTR(event),
+                       IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+}
+
+static void perf_iommu_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       pr_debug("perf: amd_iommu:perf_iommu_start\n");
+       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+               return;
+
+       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+       hwc->state = 0;
+
+       if (flags & PERF_EF_RELOAD) {
+               u64 prev_raw_count =  local64_read(&hwc->prev_count);
+               amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+                               _GET_BANK(event), _GET_CNTR(event),
+                               IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
+       }
+
+       perf_iommu_enable_event(event);
+       perf_event_update_userpage(event);
+
+}
+
+static void perf_iommu_read(struct perf_event *event)
+{
+       u64 count = 0ULL;
+       u64 prev_raw_count = 0ULL;
+       u64 delta = 0ULL;
+       struct hw_perf_event *hwc = &event->hw;
+       pr_debug("perf: amd_iommu:perf_iommu_read\n");
+
+       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+                               _GET_BANK(event), _GET_CNTR(event),
+                               IOMMU_PC_COUNTER_REG, &count, false);
+
+       /* IOMMU pc counter register is only 48 bits */
+       count &= 0xFFFFFFFFFFFFULL;
+
+       prev_raw_count =  local64_read(&hwc->prev_count);
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                                       count) != prev_raw_count)
+               return;
+
+       /* Handling 48-bit counter overflowing */
+       delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+       delta >>= COUNTER_SHIFT;
+       local64_add(delta, &event->count);
+
+}
+
+static void perf_iommu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 config;
+
+       pr_debug("perf: amd_iommu:perf_iommu_stop\n");
+
+       if (hwc->state & PERF_HES_UPTODATE)
+               return;
+
+       perf_iommu_disable_event(event);
+       WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+       hwc->state |= PERF_HES_STOPPED;
+
+       if (hwc->state & PERF_HES_UPTODATE)
+               return;
+
+       config = hwc->config;
+       perf_iommu_read(event);
+       hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_iommu_add(struct perf_event *event, int flags)
+{
+       int retval;
+       struct perf_amd_iommu *perf_iommu =
+                       container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+       pr_debug("perf: amd_iommu:perf_iommu_add\n");
+       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       /* request an iommu bank/counter */
+       retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
+       if (retval != -ENOSPC)
+               event->hw.extra_reg.reg = (u16)retval;
+       else
+               return retval;
+
+       if (flags & PERF_EF_START)
+               perf_iommu_start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+static void perf_iommu_del(struct perf_event *event, int flags)
+{
+       struct perf_amd_iommu *perf_iommu =
+                       container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+       pr_debug("perf: amd_iommu:perf_iommu_del\n");
+       perf_iommu_stop(event, PERF_EF_UPDATE);
+
+       /* clear the assigned iommu bank/counter */
+       clear_avail_iommu_bnk_cntr(perf_iommu,
+                                    _GET_BANK(event),
+                                    _GET_CNTR(event));
+
+       perf_event_update_userpage(event);
+}
+
+static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+{
+       struct attribute **attrs;
+       struct attribute_group *attr_group;
+       int i = 0, j;
+
+       while (amd_iommu_v2_event_descs[i].attr.attr.name)
+               i++;
+
+       attr_group = kzalloc(sizeof(struct attribute *)
+               * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
+       if (!attr_group)
+               return -ENOMEM;
+
+       attrs = (struct attribute **)(attr_group + 1);
+       for (j = 0; j < i; j++)
+               attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
+
+       attr_group->name = "events";
+       attr_group->attrs = attrs;
+       perf_iommu->events_group = attr_group;
+
+       return 0;
+}
+
+static __init void amd_iommu_pc_exit(void)
+{
+       if (__perf_iommu.events_group != NULL) {
+               kfree(__perf_iommu.events_group);
+               __perf_iommu.events_group = NULL;
+       }
+}
+
+static __init int _init_perf_amd_iommu(
+       struct perf_amd_iommu *perf_iommu, char *name)
+{
+       int ret;
+
+       raw_spin_lock_init(&perf_iommu->lock);
+
+       /* Init format attributes */
+       perf_iommu->format_group = &amd_iommu_format_group;
+
+       /* Init cpumask attributes to only core 0 */
+       cpumask_set_cpu(0, &iommu_cpumask);
+       perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
+
+       /* Init events attributes */
+       if (_init_events_attrs(perf_iommu) != 0)
+               pr_err("perf: amd_iommu: Only support raw events.\n");
+
+       /* Init null attributes */
+       perf_iommu->null_group = NULL;
+       perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
+
+       ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
+       if (ret) {
+               pr_err("perf: amd_iommu: Failed to initialized.\n");
+               amd_iommu_pc_exit();
+       } else {
+               pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
+                       amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
+                       amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+       }
+
+       return ret;
+}
+
+static struct perf_amd_iommu __perf_iommu = {
+       .pmu = {
+               .event_init     = perf_iommu_event_init,
+               .add            = perf_iommu_add,
+               .del            = perf_iommu_del,
+               .start          = perf_iommu_start,
+               .stop           = perf_iommu_stop,
+               .read           = perf_iommu_read,
+       },
+       .max_banks              = 0x00,
+       .max_counters           = 0x00,
+       .cntr_assign_mask       = 0ULL,
+       .format_group           = NULL,
+       .cpumask_group          = NULL,
+       .events_group           = NULL,
+       .null_group             = NULL,
+};
+
+static __init int amd_iommu_pc_init(void)
+{
+       /* Make sure the IOMMU PC resource is available */
+       if (!amd_iommu_pc_supported())
+               return -ENODEV;
+
+       _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
+
+       return 0;
+}
+
+device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/events/amd/iommu.h b/arch/x86/events/amd/iommu.h
new file mode 100644 (file)
index 0000000..845d173
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PERF_EVENT_AMD_IOMMU_H_
+#define _PERF_EVENT_AMD_IOMMU_H_
+
+/* iommu pc mmio region register indexes */
+#define IOMMU_PC_COUNTER_REG                   0x00
+#define IOMMU_PC_COUNTER_SRC_REG               0x08
+#define IOMMU_PC_PASID_MATCH_REG               0x10
+#define IOMMU_PC_DOMID_MATCH_REG               0x18
+#define IOMMU_PC_DEVID_MATCH_REG               0x20
+#define IOMMU_PC_COUNTER_REPORT_REG            0x28
+
+/* maximun specified bank/counters */
+#define PC_MAX_SPEC_BNKS                       64
+#define PC_MAX_SPEC_CNTRS                      16
+
+/* iommu pc reg masks*/
+#define IOMMU_BASE_DEVID                       0x0000
+
+/* amd_iommu_init.c external support functions */
+extern bool amd_iommu_pc_supported(void);
+
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
+                       u8 fxn, u64 *value, bool is_write);
+
+#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
new file mode 100644 (file)
index 0000000..3db9569
--- /dev/null
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+
+#include <asm/cpufeature.h>
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+
+#define NUM_COUNTERS_NB                4
+#define NUM_COUNTERS_L2                4
+#define MAX_COUNTERS           NUM_COUNTERS_NB
+
+#define RDPMC_BASE_NB          6
+#define RDPMC_BASE_L2          10
+
+#define COUNTER_SHIFT          16
+
+struct amd_uncore {
+       int id;
+       int refcnt;
+       int cpu;
+       int num_counters;
+       int rdpmc_base;
+       u32 msr_base;
+       cpumask_t *active_mask;
+       struct pmu *pmu;
+       struct perf_event *events[MAX_COUNTERS];
+       struct amd_uncore *free_when_cpu_online;
+};
+
+static struct amd_uncore * __percpu *amd_uncore_nb;
+static struct amd_uncore * __percpu *amd_uncore_l2;
+
+static struct pmu amd_nb_pmu;
+static struct pmu amd_l2_pmu;
+
+static cpumask_t amd_nb_active_mask;
+static cpumask_t amd_l2_active_mask;
+
+static bool is_nb_event(struct perf_event *event)
+{
+       return event->pmu->type == amd_nb_pmu.type;
+}
+
+static bool is_l2_event(struct perf_event *event)
+{
+       return event->pmu->type == amd_l2_pmu.type;
+}
+
+static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+{
+       if (is_nb_event(event) && amd_uncore_nb)
+               return *per_cpu_ptr(amd_uncore_nb, event->cpu);
+       else if (is_l2_event(event) && amd_uncore_l2)
+               return *per_cpu_ptr(amd_uncore_l2, event->cpu);
+
+       return NULL;
+}
+
+static void amd_uncore_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 prev, new;
+       s64 delta;
+
+       /*
+        * since we do not enable counter overflow interrupts,
+        * we do not have to worry about prev_count changing on us
+        */
+
+       prev = local64_read(&hwc->prev_count);
+       rdpmcl(hwc->event_base_rdpmc, new);
+       local64_set(&hwc->prev_count, new);
+       delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
+       delta >>= COUNTER_SHIFT;
+       local64_add(delta, &event->count);
+}
+
+static void amd_uncore_start(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (flags & PERF_EF_RELOAD)
+               wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+       hwc->state = 0;
+       wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
+       perf_event_update_userpage(event);
+}
+
+static void amd_uncore_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       wrmsrl(hwc->config_base, hwc->config);
+       hwc->state |= PERF_HES_STOPPED;
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               amd_uncore_read(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static int amd_uncore_add(struct perf_event *event, int flags)
+{
+       int i;
+       struct amd_uncore *uncore = event_to_amd_uncore(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       /* are we already assigned? */
+       if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+               goto out;
+
+       for (i = 0; i < uncore->num_counters; i++) {
+               if (uncore->events[i] == event) {
+                       hwc->idx = i;
+                       goto out;
+               }
+       }
+
+       /* if not, take the first available counter */
+       hwc->idx = -1;
+       for (i = 0; i < uncore->num_counters; i++) {
+               if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+                       hwc->idx = i;
+                       break;
+               }
+       }
+
+out:
+       if (hwc->idx == -1)
+               return -EBUSY;
+
+       hwc->config_base = uncore->msr_base + (2 * hwc->idx);
+       hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
+       hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       if (flags & PERF_EF_START)
+               amd_uncore_start(event, PERF_EF_RELOAD);
+
+       return 0;
+}
+
+static void amd_uncore_del(struct perf_event *event, int flags)
+{
+       int i;
+       struct amd_uncore *uncore = event_to_amd_uncore(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       amd_uncore_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < uncore->num_counters; i++) {
+               if (cmpxchg(&uncore->events[i], event, NULL) == event)
+                       break;
+       }
+
+       hwc->idx = -1;
+}
+
+static int amd_uncore_event_init(struct perf_event *event)
+{
+       struct amd_uncore *uncore;
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /*
+        * NB and L2 counters (MSRs) are shared across all cores that share the
+        * same NB / L2 cache. Interrupts can be directed to a single target
+        * core, however, event counts generated by processes running on other
+        * cores cannot be masked out. So we do not support sampling and
+        * per-thread events.
+        */
+       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+               return -EINVAL;
+
+       /* NB and L2 counters do not have usr/os/guest/host bits */
+       if (event->attr.exclude_user || event->attr.exclude_kernel ||
+           event->attr.exclude_host || event->attr.exclude_guest)
+               return -EINVAL;
+
+       /* and we do not enable counter overflow interrupts */
+       hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
+       hwc->idx = -1;
+
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       uncore = event_to_amd_uncore(event);
+       if (!uncore)
+               return -ENODEV;
+
+       /*
+        * since request can come in to any of the shared cores, we will remap
+        * to a single common cpu.
+        */
+       event->cpu = uncore->cpu;
+
+       return 0;
+}
+
+static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
+                                           struct device_attribute *attr,
+                                           char *buf)
+{
+       cpumask_t *active_mask;
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       if (pmu->type == amd_nb_pmu.type)
+               active_mask = &amd_nb_active_mask;
+       else if (pmu->type == amd_l2_pmu.type)
+               active_mask = &amd_l2_active_mask;
+       else
+               return 0;
+
+       return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
+
+static struct attribute *amd_uncore_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_uncore_attr_group = {
+       .attrs = amd_uncore_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15");
+
+static struct attribute *amd_uncore_format_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       NULL,
+};
+
+static struct attribute_group amd_uncore_format_group = {
+       .name = "format",
+       .attrs = amd_uncore_format_attr,
+};
+
+static const struct attribute_group *amd_uncore_attr_groups[] = {
+       &amd_uncore_attr_group,
+       &amd_uncore_format_group,
+       NULL,
+};
+
+static struct pmu amd_nb_pmu = {
+       .attr_groups    = amd_uncore_attr_groups,
+       .name           = "amd_nb",
+       .event_init     = amd_uncore_event_init,
+       .add            = amd_uncore_add,
+       .del            = amd_uncore_del,
+       .start          = amd_uncore_start,
+       .stop           = amd_uncore_stop,
+       .read           = amd_uncore_read,
+};
+
+static struct pmu amd_l2_pmu = {
+       .attr_groups    = amd_uncore_attr_groups,
+       .name           = "amd_l2",
+       .event_init     = amd_uncore_event_init,
+       .add            = amd_uncore_add,
+       .del            = amd_uncore_del,
+       .start          = amd_uncore_start,
+       .stop           = amd_uncore_stop,
+       .read           = amd_uncore_read,
+};
+
+static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
+{
+       return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
+                       cpu_to_node(cpu));
+}
+
+static int amd_uncore_cpu_up_prepare(unsigned int cpu)
+{
+       struct amd_uncore *uncore_nb = NULL, *uncore_l2;
+
+       if (amd_uncore_nb) {
+               uncore_nb = amd_uncore_alloc(cpu);
+               if (!uncore_nb)
+                       goto fail;
+               uncore_nb->cpu = cpu;
+               uncore_nb->num_counters = NUM_COUNTERS_NB;
+               uncore_nb->rdpmc_base = RDPMC_BASE_NB;
+               uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
+               uncore_nb->active_mask = &amd_nb_active_mask;
+               uncore_nb->pmu = &amd_nb_pmu;
+               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
+       }
+
+       if (amd_uncore_l2) {
+               uncore_l2 = amd_uncore_alloc(cpu);
+               if (!uncore_l2)
+                       goto fail;
+               uncore_l2->cpu = cpu;
+               uncore_l2->num_counters = NUM_COUNTERS_L2;
+               uncore_l2->rdpmc_base = RDPMC_BASE_L2;
+               uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
+               uncore_l2->active_mask = &amd_l2_active_mask;
+               uncore_l2->pmu = &amd_l2_pmu;
+               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
+       }
+
+       return 0;
+
+fail:
+       if (amd_uncore_nb)
+               *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+       kfree(uncore_nb);
+       return -ENOMEM;
+}
+
+static struct amd_uncore *
+amd_uncore_find_online_sibling(struct amd_uncore *this,
+                              struct amd_uncore * __percpu *uncores)
+{
+       unsigned int cpu;
+       struct amd_uncore *that;
+
+       for_each_online_cpu(cpu) {
+               that = *per_cpu_ptr(uncores, cpu);
+
+               if (!that)
+                       continue;
+
+               if (this == that)
+                       continue;
+
+               if (this->id == that->id) {
+                       that->free_when_cpu_online = this;
+                       this = that;
+                       break;
+               }
+       }
+
+       this->refcnt++;
+       return this;
+}
+
+static void amd_uncore_cpu_starting(unsigned int cpu)
+{
+       unsigned int eax, ebx, ecx, edx;
+       struct amd_uncore *uncore;
+
+       if (amd_uncore_nb) {
+               uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
+               cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+               uncore->id = ecx & 0xff;
+
+               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
+               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+       }
+
+       if (amd_uncore_l2) {
+               unsigned int apicid = cpu_data(cpu).apicid;
+               unsigned int nshared;
+
+               uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
+               cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
+               nshared = ((eax >> 14) & 0xfff) + 1;
+               uncore->id = apicid - (apicid % nshared);
+
+               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
+               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+       }
+}
+
+static void uncore_online(unsigned int cpu,
+                         struct amd_uncore * __percpu *uncores)
+{
+       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+       kfree(uncore->free_when_cpu_online);
+       uncore->free_when_cpu_online = NULL;
+
+       if (cpu == uncore->cpu)
+               cpumask_set_cpu(cpu, uncore->active_mask);
+}
+
+static void amd_uncore_cpu_online(unsigned int cpu)
+{
+       if (amd_uncore_nb)
+               uncore_online(cpu, amd_uncore_nb);
+
+       if (amd_uncore_l2)
+               uncore_online(cpu, amd_uncore_l2);
+}
+
+static void uncore_down_prepare(unsigned int cpu,
+                               struct amd_uncore * __percpu *uncores)
+{
+       unsigned int i;
+       struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+
+       if (this->cpu != cpu)
+               return;
+
+       /* this cpu is going down, migrate to a shared sibling if possible */
+       for_each_online_cpu(i) {
+               struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+
+               if (cpu == i)
+                       continue;
+
+               if (this == that) {
+                       perf_pmu_migrate_context(this->pmu, cpu, i);
+                       cpumask_clear_cpu(cpu, that->active_mask);
+                       cpumask_set_cpu(i, that->active_mask);
+                       that->cpu = i;
+                       break;
+               }
+       }
+}
+
+static void amd_uncore_cpu_down_prepare(unsigned int cpu)
+{
+       if (amd_uncore_nb)
+               uncore_down_prepare(cpu, amd_uncore_nb);
+
+       if (amd_uncore_l2)
+               uncore_down_prepare(cpu, amd_uncore_l2);
+}
+
+static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
+{
+       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+
+       if (cpu == uncore->cpu)
+               cpumask_clear_cpu(cpu, uncore->active_mask);
+
+       if (!--uncore->refcnt)
+               kfree(uncore);
+       *per_cpu_ptr(uncores, cpu) = NULL;
+}
+
+static void amd_uncore_cpu_dead(unsigned int cpu)
+{
+       if (amd_uncore_nb)
+               uncore_dead(cpu, amd_uncore_nb);
+
+       if (amd_uncore_l2)
+               uncore_dead(cpu, amd_uncore_l2);
+}
+
+static int
+amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
+                       void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               if (amd_uncore_cpu_up_prepare(cpu))
+                       return notifier_from_errno(-ENOMEM);
+               break;
+
+       case CPU_STARTING:
+               amd_uncore_cpu_starting(cpu);
+               break;
+
+       case CPU_ONLINE:
+               amd_uncore_cpu_online(cpu);
+               break;
+
+       case CPU_DOWN_PREPARE:
+               amd_uncore_cpu_down_prepare(cpu);
+               break;
+
+       case CPU_UP_CANCELED:
+       case CPU_DEAD:
+               amd_uncore_cpu_dead(cpu);
+               break;
+
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block amd_uncore_cpu_notifier_block = {
+       .notifier_call  = amd_uncore_cpu_notifier,
+       .priority       = CPU_PRI_PERF + 1,
+};
+
+static void __init init_cpu_already_online(void *dummy)
+{
+       unsigned int cpu = smp_processor_id();
+
+       amd_uncore_cpu_starting(cpu);
+       amd_uncore_cpu_online(cpu);
+}
+
+static void cleanup_cpu_online(void *dummy)
+{
+       unsigned int cpu = smp_processor_id();
+
+       amd_uncore_cpu_dead(cpu);
+}
+
+static int __init amd_uncore_init(void)
+{
+       unsigned int cpu, cpu2;
+       int ret = -ENODEV;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+               goto fail_nodev;
+
+       if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
+               goto fail_nodev;
+
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
+               amd_uncore_nb = alloc_percpu(struct amd_uncore *);
+               if (!amd_uncore_nb) {
+                       ret = -ENOMEM;
+                       goto fail_nb;
+               }
+               ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+               if (ret)
+                       goto fail_nb;
+
+               pr_info("perf: AMD NB counters detected\n");
+               ret = 0;
+       }
+
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
+               amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
+               if (!amd_uncore_l2) {
+                       ret = -ENOMEM;
+                       goto fail_l2;
+               }
+               ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+               if (ret)
+                       goto fail_l2;
+
+               pr_info("perf: AMD L2I counters detected\n");
+               ret = 0;
+       }
+
+       if (ret)
+               goto fail_nodev;
+
+       cpu_notifier_register_begin();
+
+       /* init cpus already online before registering for hotplug notifier */
+       for_each_online_cpu(cpu) {
+               ret = amd_uncore_cpu_up_prepare(cpu);
+               if (ret)
+                       goto fail_online;
+               smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
+       }
+
+       __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
+       cpu_notifier_register_done();
+
+       return 0;
+
+
+fail_online:
+       for_each_online_cpu(cpu2) {
+               if (cpu2 == cpu)
+                       break;
+               smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
+       }
+       cpu_notifier_register_done();
+
+       /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
+       amd_uncore_nb = amd_uncore_l2 = NULL;
+
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
+               perf_pmu_unregister(&amd_l2_pmu);
+fail_l2:
+       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
+               perf_pmu_unregister(&amd_nb_pmu);
+       if (amd_uncore_l2)
+               free_percpu(amd_uncore_l2);
+fail_nb:
+       if (amd_uncore_nb)
+               free_percpu(amd_uncore_nb);
+
+fail_nodev:
+       return ret;
+}
+device_initcall(amd_uncore_init);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
new file mode 100644 (file)
index 0000000..5e830d0
--- /dev/null
@@ -0,0 +1,2442 @@
+/*
+ * Performance events x86 architecture code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/alternative.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/timer.h>
+#include <asm/desc.h>
+#include <asm/ldt.h>
+
+#include "perf_event.h"
+
+struct x86_pmu x86_pmu __read_mostly;
+
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+       .enabled = 1,
+};
+
+struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
+
+u64 __read_mostly hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+u64 __read_mostly hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+u64 x86_perf_event_update(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       int shift = 64 - x86_pmu.cntval_bits;
+       u64 prev_raw_count, new_raw_count;
+       int idx = hwc->idx;
+       s64 delta;
+
+       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+               return 0;
+
+       /*
+        * Careful: an NMI might modify the previous event value.
+        *
+        * Our tactic to handle this is to first atomically read and
+        * exchange a new raw count - then add that new-prev delta
+        * count to the generic event atomically:
+        */
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                                       new_raw_count) != prev_raw_count)
+               goto again;
+
+       /*
+        * Now we have the new raw value and have updated the prev
+        * timestamp already. We can now calculate the elapsed delta
+        * (event-)time and add that to the generic event.
+        *
+        * Careful, not all hw sign-extends above the physical width
+        * of the count.
+        */
+       delta = (new_raw_count << shift) - (prev_raw_count << shift);
+       delta >>= shift;
+
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
+
+       return new_raw_count;
+}
+
+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg;
+       struct extra_reg *er;
+
+       reg = &event->hw.extra_reg;
+
+       if (!x86_pmu.extra_regs)
+               return 0;
+
+       for (er = x86_pmu.extra_regs; er->msr; er++) {
+               if (er->event != (config & er->config_mask))
+                       continue;
+               if (event->attr.config1 & ~er->valid_mask)
+                       return -EINVAL;
+               /* Check if the extra msrs can be safely accessed*/
+               if (!er->extra_msr_access)
+                       return -ENXIO;
+
+               reg->idx = er->idx;
+               reg->config = event->attr.config1;
+               reg->reg = er->msr;
+               break;
+       }
+       return 0;
+}
+
+static atomic_t active_events;
+static atomic_t pmc_refcount;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static bool reserve_pmc_hardware(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
+                       goto perfctr_fail;
+       }
+
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
+                       goto eventsel_fail;
+       }
+
+       return true;
+
+eventsel_fail:
+       for (i--; i >= 0; i--)
+               release_evntsel_nmi(x86_pmu_config_addr(i));
+
+       i = x86_pmu.num_counters;
+
+perfctr_fail:
+       for (i--; i >= 0; i--)
+               release_perfctr_nmi(x86_pmu_event_addr(i));
+
+       return false;
+}
+
+static void release_pmc_hardware(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               release_perfctr_nmi(x86_pmu_event_addr(i));
+               release_evntsel_nmi(x86_pmu_config_addr(i));
+       }
+}
+
+#else
+
+static bool reserve_pmc_hardware(void) { return true; }
+static void release_pmc_hardware(void) {}
+
+#endif
+
+static bool check_hw_exists(void)
+{
+       u64 val, val_fail, val_new= ~0;
+       int i, reg, reg_fail, ret = 0;
+       int bios_fail = 0;
+       int reg_safe = -1;
+
+       /*
+        * Check to see if the BIOS enabled any of the counters, if so
+        * complain and bail.
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu_config_addr(i);
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
+                       bios_fail = 1;
+                       val_fail = val;
+                       reg_fail = reg;
+               } else {
+                       reg_safe = i;
+               }
+       }
+
+       if (x86_pmu.num_counters_fixed) {
+               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+                       if (val & (0x03 << i*4)) {
+                               bios_fail = 1;
+                               val_fail = val;
+                               reg_fail = reg;
+                       }
+               }
+       }
+
+       /*
+        * If all the counters are enabled, the below test will always
+        * fail.  The tools will also become useless in this scenario.
+        * Just fail and disable the hardware counters.
+        */
+
+       if (reg_safe == -1) {
+               reg = reg_safe;
+               goto msr_fail;
+       }
+
+       /*
+        * Read the current value, change it and read it back to see if it
+        * matches, this is needed to detect certain hardware emulators
+        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+        */
+       reg = x86_pmu_event_addr(reg_safe);
+       if (rdmsrl_safe(reg, &val))
+               goto msr_fail;
+       val ^= 0xffffUL;
+       ret = wrmsrl_safe(reg, val);
+       ret |= rdmsrl_safe(reg, &val_new);
+       if (ret || val != val_new)
+               goto msr_fail;
+
+       /*
+        * We still allow the PMU driver to operate:
+        */
+       if (bios_fail) {
+               pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
+               pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
+                             reg_fail, val_fail);
+       }
+
+       return true;
+
+msr_fail:
+       pr_cont("Broken PMU hardware detected, using software events only.\n");
+       pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+               boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
+               reg, val_new);
+
+       return false;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+       x86_release_hardware();
+       atomic_dec(&active_events);
+}
+
+void hw_perf_lbr_event_destroy(struct perf_event *event)
+{
+       hw_perf_event_destroy(event);
+
+       /* undo the lbr/bts event accounting */
+       x86_del_exclusive(x86_lbr_exclusive_lbr);
+}
+
+static inline int x86_pmu_initialized(void)
+{
+       return x86_pmu.handle_irq != NULL;
+}
+
+static inline int
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       unsigned int cache_type, cache_op, cache_result;
+       u64 config, val;
+
+       config = attr->config;
+
+       cache_type = (config >>  0) & 0xff;
+       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+               return -EINVAL;
+
+       cache_op = (config >>  8) & 0xff;
+       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+               return -EINVAL;
+
+       cache_result = (config >> 16) & 0xff;
+       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return -EINVAL;
+
+       val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+       if (val == 0)
+               return -ENOENT;
+
+       if (val == -1)
+               return -EINVAL;
+
+       hwc->config |= val;
+       attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+       return x86_pmu_extra_regs(val, event);
+}
+
+int x86_reserve_hardware(void)
+{
+       int err = 0;
+
+       if (!atomic_inc_not_zero(&pmc_refcount)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_read(&pmc_refcount) == 0) {
+                       if (!reserve_pmc_hardware())
+                               err = -EBUSY;
+                       else
+                               reserve_ds_buffers();
+               }
+               if (!err)
+                       atomic_inc(&pmc_refcount);
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+
+       return err;
+}
+
+void x86_release_hardware(void)
+{
+       if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
+               release_pmc_hardware();
+               release_ds_buffers();
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+}
+
+/*
+ * Check if we can create event of a certain type (that no conflicting events
+ * are present).
+ */
+int x86_add_exclusive(unsigned int what)
+{
+       int i;
+
+       if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
+               mutex_lock(&pmc_reserve_mutex);
+               for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
+                       if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
+                               goto fail_unlock;
+               }
+               atomic_inc(&x86_pmu.lbr_exclusive[what]);
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+
+       atomic_inc(&active_events);
+       return 0;
+
+fail_unlock:
+       mutex_unlock(&pmc_reserve_mutex);
+       return -EBUSY;
+}
+
+void x86_del_exclusive(unsigned int what)
+{
+       atomic_dec(&x86_pmu.lbr_exclusive[what]);
+       atomic_dec(&active_events);
+}
+
+int x86_setup_perfctr(struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 config;
+
+       if (!is_sampling_event(event)) {
+               hwc->sample_period = x86_pmu.max_period;
+               hwc->last_period = hwc->sample_period;
+               local64_set(&hwc->period_left, hwc->sample_period);
+       }
+
+       if (attr->type == PERF_TYPE_RAW)
+               return x86_pmu_extra_regs(event->attr.config, event);
+
+       if (attr->type == PERF_TYPE_HW_CACHE)
+               return set_ext_hw_attr(hwc, event);
+
+       if (attr->config >= x86_pmu.max_events)
+               return -EINVAL;
+
+       /*
+        * The generic map:
+        */
+       config = x86_pmu.event_map(attr->config);
+
+       if (config == 0)
+               return -ENOENT;
+
+       if (config == -1LL)
+               return -EINVAL;
+
+       /*
+        * Branch tracing:
+        */
+       if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+           !attr->freq && hwc->sample_period == 1) {
+               /* BTS is not supported by this architecture. */
+               if (!x86_pmu.bts_active)
+                       return -EOPNOTSUPP;
+
+               /* BTS is currently only allowed for user-mode. */
+               if (!attr->exclude_kernel)
+                       return -EOPNOTSUPP;
+
+               /* disallow bts if conflicting events are present */
+               if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+                       return -EBUSY;
+
+               event->destroy = hw_perf_lbr_event_destroy;
+       }
+
+       hwc->config |= config;
+
+       return 0;
+}
+
+/*
+ * check that branch_sample_type is compatible with
+ * settings needed for precise_ip > 1 which implies
+ * using the LBR to capture ALL taken branches at the
+ * priv levels of the measurement
+ */
+static inline int precise_br_compat(struct perf_event *event)
+{
+       u64 m = event->attr.branch_sample_type;
+       u64 b = 0;
+
+       /* must capture all branches */
+       if (!(m & PERF_SAMPLE_BRANCH_ANY))
+               return 0;
+
+       m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
+
+       if (!event->attr.exclude_user)
+               b |= PERF_SAMPLE_BRANCH_USER;
+
+       if (!event->attr.exclude_kernel)
+               b |= PERF_SAMPLE_BRANCH_KERNEL;
+
+       /*
+        * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
+        */
+
+       return m == b;
+}
+
+int x86_pmu_hw_config(struct perf_event *event)
+{
+       if (event->attr.precise_ip) {
+               int precise = 0;
+
+               /* Support for constant skid */
+               if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
+                       precise++;
+
+                       /* Support for IP fixup */
+                       if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
+                               precise++;
+
+                       if (x86_pmu.pebs_prec_dist)
+                               precise++;
+               }
+
+               if (event->attr.precise_ip > precise)
+                       return -EOPNOTSUPP;
+       }
+       /*
+        * check that PEBS LBR correction does not conflict with
+        * whatever the user is asking with attr->branch_sample_type
+        */
+       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
+               u64 *br_type = &event->attr.branch_sample_type;
+
+               if (has_branch_stack(event)) {
+                       if (!precise_br_compat(event))
+                               return -EOPNOTSUPP;
+
+                       /* branch_sample_type is compatible */
+
+               } else {
+                       /*
+                        * user did not specify  branch_sample_type
+                        *
+                        * For PEBS fixups, we capture all
+                        * the branches at the priv level of the
+                        * event.
+                        */
+                       *br_type = PERF_SAMPLE_BRANCH_ANY;
+
+                       if (!event->attr.exclude_user)
+                               *br_type |= PERF_SAMPLE_BRANCH_USER;
+
+                       if (!event->attr.exclude_kernel)
+                               *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
+               }
+       }
+
+       if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
+               event->attach_state |= PERF_ATTACH_TASK_DATA;
+
+       /*
+        * Generate PMC IRQs:
+        * (keep 'enabled' bit clear for now)
+        */
+       event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
+
+       /*
+        * Count user and OS events unless requested not to
+        */
+       if (!event->attr.exclude_user)
+               event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
+       if (!event->attr.exclude_kernel)
+               event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
+
+       if (event->attr.type == PERF_TYPE_RAW)
+               event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
+
+       if (event->attr.sample_period && x86_pmu.limit_period) {
+               if (x86_pmu.limit_period(event, event->attr.sample_period) >
+                               event->attr.sample_period)
+                       return -EINVAL;
+       }
+
+       return x86_setup_perfctr(event);
+}
+
+/*
+ * Setup the hardware configuration for a given attr_type
+ */
+static int __x86_pmu_event_init(struct perf_event *event)
+{
+       int err;
+
+       if (!x86_pmu_initialized())
+               return -ENODEV;
+
+       err = x86_reserve_hardware();
+       if (err)
+               return err;
+
+       atomic_inc(&active_events);
+       event->destroy = hw_perf_event_destroy;
+
+       event->hw.idx = -1;
+       event->hw.last_cpu = -1;
+       event->hw.last_tag = ~0ULL;
+
+       /* mark unused */
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+       event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
+       return x86_pmu.hw_config(event);
+}
+
+void x86_pmu_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               u64 val;
+
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+               rdmsrl(x86_pmu_config_addr(idx), val);
+               if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
+                       continue;
+               val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+               wrmsrl(x86_pmu_config_addr(idx), val);
+       }
+}
+
+/*
+ * There may be PMI landing after enabled=0. The PMI hitting could be before or
+ * after disable_all.
+ *
+ * If PMI hits before disable_all, the PMU will be disabled in the NMI handler.
+ * It will not be re-enabled in the NMI handler again, because enabled=0. After
+ * handling the NMI, disable_all will be called, which will not change the
+ * state either. If PMI hits after disable_all, the PMU is already disabled
+ * before entering NMI handler. The NMI handler will not change the state
+ * either.
+ *
+ * So either situation is harmless.
+ */
+static void x86_pmu_disable(struct pmu *pmu)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (!x86_pmu_initialized())
+               return;
+
+       if (!cpuc->enabled)
+               return;
+
+       cpuc->n_added = 0;
+       cpuc->enabled = 0;
+       barrier();
+
+       x86_pmu.disable_all();
+}
+
+void x86_pmu_enable_all(int added)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+
+               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+       }
+}
+
+static struct pmu pmu;
+
+static inline int is_x86_event(struct perf_event *event)
+{
+       return event->pmu == &pmu;
+}
+
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+       int     weight;
+       int     event;          /* event index */
+       int     counter;        /* counter index */
+       int     unassigned;     /* number of events to be assigned left */
+       int     nr_gp;          /* number of GP counters used */
+       unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define        SCHED_STATES_MAX        2
+
+struct perf_sched {
+       int                     max_weight;
+       int                     max_events;
+       int                     max_gp;
+       int                     saved_states;
+       struct event_constraint **constraints;
+       struct sched_state      state;
+       struct sched_state      saved[SCHED_STATES_MAX];
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+                           int num, int wmin, int wmax, int gpmax)
+{
+       int idx;
+
+       memset(sched, 0, sizeof(*sched));
+       sched->max_events       = num;
+       sched->max_weight       = wmax;
+       sched->max_gp           = gpmax;
+       sched->constraints      = constraints;
+
+       for (idx = 0; idx < num; idx++) {
+               if (constraints[idx]->weight == wmin)
+                       break;
+       }
+
+       sched->state.event      = idx;          /* start with min weight */
+       sched->state.weight     = wmin;
+       sched->state.unassigned = num;
+}
+
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+       if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+               return;
+
+       sched->saved[sched->saved_states] = sched->state;
+       sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+       if (!sched->saved_states)
+               return false;
+
+       sched->saved_states--;
+       sched->state = sched->saved[sched->saved_states];
+
+       /* continue with next counter: */
+       clear_bit(sched->state.counter++, sched->state.used);
+
+       return true;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool __perf_sched_find_counter(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+       int idx;
+
+       if (!sched->state.unassigned)
+               return false;
+
+       if (sched->state.event >= sched->max_events)
+               return false;
+
+       c = sched->constraints[sched->state.event];
+       /* Prefer fixed purpose counters */
+       if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
+               idx = INTEL_PMC_IDX_FIXED;
+               for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+                       if (!__test_and_set_bit(idx, sched->state.used))
+                               goto done;
+               }
+       }
+
+       /* Grab the first unused counter starting with idx */
+       idx = sched->state.counter;
+       for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
+               if (!__test_and_set_bit(idx, sched->state.used)) {
+                       if (sched->state.nr_gp++ >= sched->max_gp)
+                               return false;
+
+                       goto done;
+               }
+       }
+
+       return false;
+
+done:
+       sched->state.counter = idx;
+
+       if (c->overlap)
+               perf_sched_save_state(sched);
+
+       return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+       while (!__perf_sched_find_counter(sched)) {
+               if (!perf_sched_restore_state(sched))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+
+       if (!sched->state.unassigned || !--sched->state.unassigned)
+               return false;
+
+       do {
+               /* next event */
+               sched->state.event++;
+               if (sched->state.event >= sched->max_events) {
+                       /* next weight */
+                       sched->state.event = 0;
+                       sched->state.weight++;
+                       if (sched->state.weight > sched->max_weight)
+                               return false;
+               }
+               c = sched->constraints[sched->state.event];
+       } while (c->weight != sched->state.weight);
+
+       sched->state.counter = 0;       /* start with first counter */
+
+       return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+int perf_assign_events(struct event_constraint **constraints, int n,
+                       int wmin, int wmax, int gpmax, int *assign)
+{
+       struct perf_sched sched;
+
+       perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
+
+       do {
+               if (!perf_sched_find_counter(&sched))
+                       break;  /* failed */
+               if (assign)
+                       assign[sched.state.event] = sched.state.counter;
+       } while (perf_sched_next_event(&sched));
+
+       return sched.state.unassigned;
+}
+EXPORT_SYMBOL_GPL(perf_assign_events);
+
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+       struct event_constraint *c;
+       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       struct perf_event *e;
+       int i, wmin, wmax, unsched = 0;
+       struct hw_perf_event *hwc;
+
+       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+       if (x86_pmu.start_scheduling)
+               x86_pmu.start_scheduling(cpuc);
+
+       for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+               cpuc->event_constraint[i] = NULL;
+               c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+               cpuc->event_constraint[i] = c;
+
+               wmin = min(wmin, c->weight);
+               wmax = max(wmax, c->weight);
+       }
+
+       /*
+        * fastpath, try to reuse previous register
+        */
+       for (i = 0; i < n; i++) {
+               hwc = &cpuc->event_list[i]->hw;
+               c = cpuc->event_constraint[i];
+
+               /* never assigned */
+               if (hwc->idx == -1)
+                       break;
+
+               /* constraint still honored */
+               if (!test_bit(hwc->idx, c->idxmsk))
+                       break;
+
+               /* not already used */
+               if (test_bit(hwc->idx, used_mask))
+                       break;
+
+               __set_bit(hwc->idx, used_mask);
+               if (assign)
+                       assign[i] = hwc->idx;
+       }
+
+       /* slow path */
+       if (i != n) {
+               int gpmax = x86_pmu.num_counters;
+
+               /*
+                * Do not allow scheduling of more than half the available
+                * generic counters.
+                *
+                * This helps avoid counter starvation of sibling thread by
+                * ensuring at most half the counters cannot be in exclusive
+                * mode. There is no designated counters for the limits. Any
+                * N/2 counters can be used. This helps with events with
+                * specific counter constraints.
+                */
+               if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+                   READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+                       gpmax /= 2;
+
+               unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
+                                            wmax, gpmax, assign);
+       }
+
+       /*
+        * In case of success (unsched = 0), mark events as committed,
+        * so we do not put_constraint() in case new events are added
+        * and fail to be scheduled
+        *
+        * We invoke the lower level commit callback to lock the resource
+        *
+        * We do not need to do all of this in case we are called to
+        * validate an event group (assign == NULL)
+        */
+       if (!unsched && assign) {
+               for (i = 0; i < n; i++) {
+                       e = cpuc->event_list[i];
+                       e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+                       if (x86_pmu.commit_scheduling)
+                               x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+               }
+       } else {
+               for (i = 0; i < n; i++) {
+                       e = cpuc->event_list[i];
+                       /*
+                        * do not put_constraint() on comitted events,
+                        * because they are good to go
+                        */
+                       if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+                               continue;
+
+                       /*
+                        * release events that failed scheduling
+                        */
+                       if (x86_pmu.put_event_constraints)
+                               x86_pmu.put_event_constraints(cpuc, e);
+               }
+       }
+
+       if (x86_pmu.stop_scheduling)
+               x86_pmu.stop_scheduling(cpuc);
+
+       return unsched ? -EINVAL : 0;
+}
+
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+{
+       struct perf_event *event;
+       int n, max_count;
+
+       max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
+
+       /* current number of events already accepted */
+       n = cpuc->n_events;
+
+       if (is_x86_event(leader)) {
+               if (n >= max_count)
+                       return -EINVAL;
+               cpuc->event_list[n] = leader;
+               n++;
+       }
+       if (!dogrp)
+               return n;
+
+       list_for_each_entry(event, &leader->sibling_list, group_entry) {
+               if (!is_x86_event(event) ||
+                   event->state <= PERF_EVENT_STATE_OFF)
+                       continue;
+
+               if (n >= max_count)
+                       return -EINVAL;
+
+               cpuc->event_list[n] = event;
+               n++;
+       }
+       return n;
+}
+
+static inline void x86_assign_hw_event(struct perf_event *event,
+                               struct cpu_hw_events *cpuc, int i)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       hwc->idx = cpuc->assign[i];
+       hwc->last_cpu = smp_processor_id();
+       hwc->last_tag = ++cpuc->tags[i];
+
+       if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
+               hwc->config_base = 0;
+               hwc->event_base = 0;
+       } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+               hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
+               hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
+       } else {
+               hwc->config_base = x86_pmu_config_addr(hwc->idx);
+               hwc->event_base  = x86_pmu_event_addr(hwc->idx);
+               hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
+       }
+}
+
+static inline int match_prev_assignment(struct hw_perf_event *hwc,
+                                       struct cpu_hw_events *cpuc,
+                                       int i)
+{
+       return hwc->idx == cpuc->assign[i] &&
+               hwc->last_cpu == smp_processor_id() &&
+               hwc->last_tag == cpuc->tags[i];
+}
+
+static void x86_pmu_start(struct perf_event *event, int flags);
+
+static void x86_pmu_enable(struct pmu *pmu)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_event *event;
+       struct hw_perf_event *hwc;
+       int i, added = cpuc->n_added;
+
+       if (!x86_pmu_initialized())
+               return;
+
+       if (cpuc->enabled)
+               return;
+
+       if (cpuc->n_added) {
+               int n_running = cpuc->n_events - cpuc->n_added;
+               /*
+                * apply assignment obtained either from
+                * hw_perf_group_sched_in() or x86_pmu_enable()
+                *
+                * step1: save events moving to new counters
+                */
+               for (i = 0; i < n_running; i++) {
+                       event = cpuc->event_list[i];
+                       hwc = &event->hw;
+
+                       /*
+                        * we can avoid reprogramming counter if:
+                        * - assigned same counter as last time
+                        * - running on same CPU as last time
+                        * - no other event has used the counter since
+                        */
+                       if (hwc->idx == -1 ||
+                           match_prev_assignment(hwc, cpuc, i))
+                               continue;
+
+                       /*
+                        * Ensure we don't accidentally enable a stopped
+                        * counter simply because we rescheduled.
+                        */
+                       if (hwc->state & PERF_HES_STOPPED)
+                               hwc->state |= PERF_HES_ARCH;
+
+                       x86_pmu_stop(event, PERF_EF_UPDATE);
+               }
+
+               /*
+                * step2: reprogram moved events into new counters
+                */
+               for (i = 0; i < cpuc->n_events; i++) {
+                       event = cpuc->event_list[i];
+                       hwc = &event->hw;
+
+                       if (!match_prev_assignment(hwc, cpuc, i))
+                               x86_assign_hw_event(event, cpuc, i);
+                       else if (i < n_running)
+                               continue;
+
+                       if (hwc->state & PERF_HES_ARCH)
+                               continue;
+
+                       x86_pmu_start(event, PERF_EF_RELOAD);
+               }
+               cpuc->n_added = 0;
+               perf_events_lapic_init();
+       }
+
+       cpuc->enabled = 1;
+       barrier();
+
+       x86_pmu.enable_all(added);
+}
+
+static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+int x86_perf_event_set_period(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       s64 left = local64_read(&hwc->period_left);
+       s64 period = hwc->sample_period;
+       int ret = 0, idx = hwc->idx;
+
+       if (idx == INTEL_PMC_IDX_FIXED_BTS)
+               return 0;
+
+       /*
+        * If we are way outside a reasonable range then just skip forward:
+        */
+       if (unlikely(left <= -period)) {
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (unlikely(left <= 0)) {
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+       /*
+        * Quirk: certain CPUs dont like it if just 1 hw_event is left:
+        */
+       if (unlikely(left < 2))
+               left = 2;
+
+       if (left > x86_pmu.max_period)
+               left = x86_pmu.max_period;
+
+       if (x86_pmu.limit_period)
+               left = x86_pmu.limit_period(event, left);
+
+       per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+
+       if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+           local64_read(&hwc->prev_count) != (u64)-left) {
+               /*
+                * The hw event starts counting from this event offset,
+                * mark it to be able to extra future deltas:
+                */
+               local64_set(&hwc->prev_count, (u64)-left);
+
+               wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+       }
+
+       /*
+        * Due to erratum on certan cpu we need
+        * a second write to be sure the register
+        * is updated properly
+        */
+       if (x86_pmu.perfctr_second_write) {
+               wrmsrl(hwc->event_base,
+                       (u64)(-left) & x86_pmu.cntval_mask);
+       }
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+void x86_pmu_enable_event(struct perf_event *event)
+{
+       if (__this_cpu_read(cpu_hw_events.enabled))
+               __x86_pmu_enable_event(&event->hw,
+                                      ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * Add a single event to the PMU.
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ */
+static int x86_pmu_add(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc;
+       int assign[X86_PMC_IDX_MAX];
+       int n, n0, ret;
+
+       hwc = &event->hw;
+
+       n0 = cpuc->n_events;
+       ret = n = collect_events(cpuc, event, false);
+       if (ret < 0)
+               goto out;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_ARCH;
+
+       /*
+        * If group events scheduling transaction was started,
+        * skip the schedulability test here, it will be performed
+        * at commit time (->commit_txn) as a whole.
+        */
+       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+               goto done_collect;
+
+       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       if (ret)
+               goto out;
+       /*
+        * copy new assignment, now we know it is possible
+        * will be used by hw_perf_enable()
+        */
+       memcpy(cpuc->assign, assign, n*sizeof(int));
+
+done_collect:
+       /*
+        * Commit the collect_events() state. See x86_pmu_del() and
+        * x86_pmu_*_txn().
+        */
+       cpuc->n_events = n;
+       cpuc->n_added += n - n0;
+       cpuc->n_txn += n - n0;
+
+       ret = 0;
+out:
+       return ret;
+}
+
+static void x86_pmu_start(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx = event->hw.idx;
+
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       if (WARN_ON_ONCE(idx == -1))
+               return;
+
+       if (flags & PERF_EF_RELOAD) {
+               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+               x86_perf_event_set_period(event);
+       }
+
+       event->hw.state = 0;
+
+       cpuc->events[idx] = event;
+       __set_bit(idx, cpuc->active_mask);
+       __set_bit(idx, cpuc->running);
+       x86_pmu.enable(event);
+       perf_event_update_userpage(event);
+}
+
+void perf_event_print_debug(void)
+{
+       u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+       u64 pebs, debugctl;
+       struct cpu_hw_events *cpuc;
+       unsigned long flags;
+       int cpu, idx;
+
+       if (!x86_pmu.num_counters)
+               return;
+
+       local_irq_save(flags);
+
+       cpu = smp_processor_id();
+       cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       if (x86_pmu.version >= 2) {
+               rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+               rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+               rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+               pr_info("\n");
+               pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+               pr_info("CPU#%d: status:     %016llx\n", cpu, status);
+               pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
+               pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+               if (x86_pmu.pebs_constraints) {
+                       rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
+                       pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
+               }
+               if (x86_pmu.lbr_nr) {
+                       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+                       pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
+               }
+       }
+       pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+               rdmsrl(x86_pmu_event_addr(idx), pmc_count);
+
+               prev_left = per_cpu(pmc_prev_left[idx], cpu);
+
+               pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
+                       cpu, idx, pmc_ctrl);
+               pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
+                       cpu, idx, pmc_count);
+               pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
+                       cpu, idx, prev_left);
+       }
+       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+               pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+                       cpu, idx, pmc_count);
+       }
+       local_irq_restore(flags);
+}
+
+void x86_pmu_stop(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
+               x86_pmu.disable(event);
+               cpuc->events[hwc->idx] = NULL;
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               x86_perf_event_update(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static void x86_pmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int i;
+
+       /*
+        * event is descheduled
+        */
+       event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+       /*
+        * If we're called during a txn, we don't need to do anything.
+        * The events never got scheduled and ->cancel_txn will truncate
+        * the event_list.
+        *
+        * XXX assumes any ->del() called during a TXN will only be on
+        * an event added during that same TXN.
+        */
+       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+               return;
+
+       /*
+        * Not a TXN, therefore cleanup properly.
+        */
+       x86_pmu_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < cpuc->n_events; i++) {
+               if (event == cpuc->event_list[i])
+                       break;
+       }
+
+       if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
+               return;
+
+       /* If we have a newly added event; make sure to decrease n_added. */
+       if (i >= cpuc->n_events - cpuc->n_added)
+               --cpuc->n_added;
+
+       if (x86_pmu.put_event_constraints)
+               x86_pmu.put_event_constraints(cpuc, event);
+
+       /* Delete the array entry. */
+       while (++i < cpuc->n_events) {
+               cpuc->event_list[i-1] = cpuc->event_list[i];
+               cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+       }
+       --cpuc->n_events;
+
+       perf_event_update_userpage(event);
+}
+
+int x86_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       struct perf_event *event;
+       int idx, handled = 0;
+       u64 val;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * Some chipsets need to unmask the LVTPC in a particular spot
+        * inside the nmi handler.  As a result, the unmasking was pushed
+        * into all the nmi handlers.
+        *
+        * This generic handler doesn't seem to have any issues where the
+        * unmasking occurs so it was left at the top.
+        */
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               if (!test_bit(idx, cpuc->active_mask)) {
+                       /*
+                        * Though we deactivated the counter some cpus
+                        * might still deliver spurious interrupts still
+                        * in flight. Catch them:
+                        */
+                       if (__test_and_clear_bit(idx, cpuc->running))
+                               handled++;
+                       continue;
+               }
+
+               event = cpuc->events[idx];
+
+               val = x86_perf_event_update(event);
+               if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
+                       continue;
+
+               /*
+                * event overflow
+                */
+               handled++;
+               perf_sample_data_init(&data, 0, event->hw.last_period);
+
+               if (!x86_perf_event_set_period(event))
+                       continue;
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       if (handled)
+               inc_irq_stat(apic_perf_irqs);
+
+       return handled;
+}
+
+void perf_events_lapic_init(void)
+{
+       if (!x86_pmu.apic || !x86_pmu_initialized())
+               return;
+
+       /*
+        * Always use NMI for PMU
+        */
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+
+static int
+perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+{
+       u64 start_clock;
+       u64 finish_clock;
+       int ret;
+
+       /*
+        * All PMUs/events that share this PMI handler should make sure to
+        * increment active_events for their events.
+        */
+       if (!atomic_read(&active_events))
+               return NMI_DONE;
+
+       start_clock = sched_clock();
+       ret = x86_pmu.handle_irq(regs);
+       finish_clock = sched_clock();
+
+       perf_sample_event_took(finish_clock - start_clock);
+
+       return ret;
+}
+NOKPROBE_SYMBOL(perf_event_nmi_handler);
+
+struct event_constraint emptyconstraint;
+struct event_constraint unconstrained;
+
+static int
+x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       int i, ret = NOTIFY_OK;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+                       cpuc->kfree_on_online[i] = NULL;
+               if (x86_pmu.cpu_prepare)
+                       ret = x86_pmu.cpu_prepare(cpu);
+               break;
+
+       case CPU_STARTING:
+               if (x86_pmu.cpu_starting)
+                       x86_pmu.cpu_starting(cpu);
+               break;
+
+       case CPU_ONLINE:
+               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+                       kfree(cpuc->kfree_on_online[i]);
+                       cpuc->kfree_on_online[i] = NULL;
+               }
+               break;
+
+       case CPU_DYING:
+               if (x86_pmu.cpu_dying)
+                       x86_pmu.cpu_dying(cpu);
+               break;
+
+       case CPU_UP_CANCELED:
+       case CPU_DEAD:
+               if (x86_pmu.cpu_dead)
+                       x86_pmu.cpu_dead(cpu);
+               break;
+
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+static void __init pmu_check_apic(void)
+{
+       if (cpu_has_apic)
+               return;
+
+       x86_pmu.apic = 0;
+       pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+       pr_info("no hardware sampling interrupt available.\n");
+
+       /*
+        * If we have a PMU initialized but no APIC
+        * interrupts, we cannot sample hardware
+        * events (user-space has to fall back and
+        * sample via a hrtimer based software event):
+        */
+       pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+}
+
+static struct attribute_group x86_pmu_format_group = {
+       .name = "format",
+       .attrs = NULL,
+};
+
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static void __init filter_events(struct attribute **attrs)
+{
+       struct device_attribute *d;
+       struct perf_pmu_events_attr *pmu_attr;
+       int offset = 0;
+       int i, j;
+
+       for (i = 0; attrs[i]; i++) {
+               d = (struct device_attribute *)attrs[i];
+               pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
+               /* str trumps id */
+               if (pmu_attr->event_str)
+                       continue;
+               if (x86_pmu.event_map(i + offset))
+                       continue;
+
+               for (j = i; attrs[j]; j++)
+                       attrs[j] = attrs[j + 1];
+
+               /* Check the shifted attr. */
+               i--;
+
+               /*
+                * event_map() is index based, the attrs array is organized
+                * by increasing event index. If we shift the events, then
+                * we need to compensate for the event_map(), otherwise
+                * we are looking up the wrong event in the map
+                */
+               offset++;
+       }
+}
+
+/* Merge two pointer arrays */
+__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
+{
+       struct attribute **new;
+       int j, i;
+
+       for (j = 0; a[j]; j++)
+               ;
+       for (i = 0; b[i]; i++)
+               j++;
+       j++;
+
+       new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+       if (!new)
+               return NULL;
+
+       j = 0;
+       for (i = 0; a[i]; i++)
+               new[j++] = a[i];
+       for (i = 0; b[i]; i++)
+               new[j++] = b[i];
+       new[j] = NULL;
+
+       return new;
+}
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+                         char *page)
+{
+       struct perf_pmu_events_attr *pmu_attr = \
+               container_of(attr, struct perf_pmu_events_attr, attr);
+       u64 config = x86_pmu.event_map(pmu_attr->id);
+
+       /* string trumps id */
+       if (pmu_attr->event_str)
+               return sprintf(page, "%s", pmu_attr->event_str);
+
+       return x86_pmu.events_sysfs_show(page, config);
+}
+
+EVENT_ATTR(cpu-cycles,                 CPU_CYCLES              );
+EVENT_ATTR(instructions,               INSTRUCTIONS            );
+EVENT_ATTR(cache-references,           CACHE_REFERENCES        );
+EVENT_ATTR(cache-misses,               CACHE_MISSES            );
+EVENT_ATTR(branch-instructions,                BRANCH_INSTRUCTIONS     );
+EVENT_ATTR(branch-misses,              BRANCH_MISSES           );
+EVENT_ATTR(bus-cycles,                 BUS_CYCLES              );
+EVENT_ATTR(stalled-cycles-frontend,    STALLED_CYCLES_FRONTEND );
+EVENT_ATTR(stalled-cycles-backend,     STALLED_CYCLES_BACKEND  );
+EVENT_ATTR(ref-cycles,                 REF_CPU_CYCLES          );
+
+static struct attribute *empty_attrs;
+
+static struct attribute *events_attr[] = {
+       EVENT_PTR(CPU_CYCLES),
+       EVENT_PTR(INSTRUCTIONS),
+       EVENT_PTR(CACHE_REFERENCES),
+       EVENT_PTR(CACHE_MISSES),
+       EVENT_PTR(BRANCH_INSTRUCTIONS),
+       EVENT_PTR(BRANCH_MISSES),
+       EVENT_PTR(BUS_CYCLES),
+       EVENT_PTR(STALLED_CYCLES_FRONTEND),
+       EVENT_PTR(STALLED_CYCLES_BACKEND),
+       EVENT_PTR(REF_CPU_CYCLES),
+       NULL,
+};
+
+static struct attribute_group x86_pmu_events_group = {
+       .name = "events",
+       .attrs = events_attr,
+};
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
+{
+       u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+       u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
+       bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
+       bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
+       bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
+       bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
+       ssize_t ret;
+
+       /*
+       * We have whole page size to spend and just little data
+       * to write, so we can safely use sprintf.
+       */
+       ret = sprintf(page, "event=0x%02llx", event);
+
+       if (umask)
+               ret += sprintf(page + ret, ",umask=0x%02llx", umask);
+
+       if (edge)
+               ret += sprintf(page + ret, ",edge");
+
+       if (pc)
+               ret += sprintf(page + ret, ",pc");
+
+       if (any)
+               ret += sprintf(page + ret, ",any");
+
+       if (inv)
+               ret += sprintf(page + ret, ",inv");
+
+       if (cmask)
+               ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
+
+       ret += sprintf(page + ret, "\n");
+
+       return ret;
+}
+
+static int __init init_hw_perf_events(void)
+{
+       struct x86_pmu_quirk *quirk;
+       int err;
+
+       pr_info("Performance Events: ");
+
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_INTEL:
+               err = intel_pmu_init();
+               break;
+       case X86_VENDOR_AMD:
+               err = amd_pmu_init();
+               break;
+       default:
+               err = -ENOTSUPP;
+       }
+       if (err != 0) {
+               pr_cont("no PMU driver, software events only.\n");
+               return 0;
+       }
+
+       pmu_check_apic();
+
+       /* sanity check that the hardware exists or is emulated */
+       if (!check_hw_exists())
+               return 0;
+
+       pr_cont("%s PMU driver.\n", x86_pmu.name);
+
+       x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
+       for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
+               quirk->func();
+
+       if (!x86_pmu.intel_ctrl)
+               x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+       perf_events_lapic_init();
+       register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
+
+       unconstrained = (struct event_constraint)
+               __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
+                                  0, x86_pmu.num_counters, 0, 0);
+
+       x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+
+       if (x86_pmu.event_attrs)
+               x86_pmu_events_group.attrs = x86_pmu.event_attrs;
+
+       if (!x86_pmu.events_sysfs_show)
+               x86_pmu_events_group.attrs = &empty_attrs;
+       else
+               filter_events(x86_pmu_events_group.attrs);
+
+       if (x86_pmu.cpu_events) {
+               struct attribute **tmp;
+
+               tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
+               if (!WARN_ON(!tmp))
+                       x86_pmu_events_group.attrs = tmp;
+       }
+
+       pr_info("... version:                %d\n",     x86_pmu.version);
+       pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
+       pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
+       pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
+       pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
+       pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
+       pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
+
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+       perf_cpu_notifier(x86_pmu_notifier);
+
+       return 0;
+}
+early_initcall(init_hw_perf_events);
+
+static inline void x86_pmu_read(struct perf_event *event)
+{
+       x86_perf_event_update(event);
+}
+
+/*
+ * Start group events scheduling transaction
+ * Set the flag to make pmu::enable() not perform the
+ * schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
+ */
+static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       WARN_ON_ONCE(cpuc->txn_flags);          /* txn already in flight */
+
+       cpuc->txn_flags = txn_flags;
+       if (txn_flags & ~PERF_PMU_TXN_ADD)
+               return;
+
+       perf_pmu_disable(pmu);
+       __this_cpu_write(cpu_hw_events.n_txn, 0);
+}
+
+/*
+ * Stop group events scheduling transaction
+ * Clear the flag and pmu::enable() will perform the
+ * schedulability test.
+ */
+static void x86_pmu_cancel_txn(struct pmu *pmu)
+{
+       unsigned int txn_flags;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+       txn_flags = cpuc->txn_flags;
+       cpuc->txn_flags = 0;
+       if (txn_flags & ~PERF_PMU_TXN_ADD)
+               return;
+
+       /*
+        * Truncate collected array by the number of events added in this
+        * transaction. See x86_pmu_add() and x86_pmu_*_txn().
+        */
+       __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
+       __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
+       perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit group events scheduling transaction
+ * Perform the group schedulability test as a whole
+ * Return 0 if success
+ *
+ * Does not cancel the transaction on failure; expects the caller to do this.
+ */
+static int x86_pmu_commit_txn(struct pmu *pmu)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int assign[X86_PMC_IDX_MAX];
+       int n, ret;
+
+       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+       if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+               cpuc->txn_flags = 0;
+               return 0;
+       }
+
+       n = cpuc->n_events;
+
+       if (!x86_pmu_initialized())
+               return -EAGAIN;
+
+       ret = x86_pmu.schedule_events(cpuc, n, assign);
+       if (ret)
+               return ret;
+
+       /*
+        * copy new assignment, now we know it is possible
+        * will be used by hw_perf_enable()
+        */
+       memcpy(cpuc->assign, assign, n*sizeof(int));
+
+       cpuc->txn_flags = 0;
+       perf_pmu_enable(pmu);
+       return 0;
+}
+/*
+ * a fake_cpuc is used to validate event groups. Due to
+ * the extra reg logic, we need to also allocate a fake
+ * per_core and per_cpu structure. Otherwise, group events
+ * using extra reg may conflict without the kernel being
+ * able to catch this when the last event gets added to
+ * the group.
+ */
+static void free_fake_cpuc(struct cpu_hw_events *cpuc)
+{
+       kfree(cpuc->shared_regs);
+       kfree(cpuc);
+}
+
+static struct cpu_hw_events *allocate_fake_cpuc(void)
+{
+       struct cpu_hw_events *cpuc;
+       int cpu = raw_smp_processor_id();
+
+       cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
+       if (!cpuc)
+               return ERR_PTR(-ENOMEM);
+
+       /* only needed, if we have extra_regs */
+       if (x86_pmu.extra_regs) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       goto error;
+       }
+       cpuc->is_fake = 1;
+       return cpuc;
+error:
+       free_fake_cpuc(cpuc);
+       return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * validate that we can schedule this event
+ */
+static int validate_event(struct perf_event *event)
+{
+       struct cpu_hw_events *fake_cpuc;
+       struct event_constraint *c;
+       int ret = 0;
+
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
+
+       c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
+
+       if (!c || !c->weight)
+               ret = -EINVAL;
+
+       if (x86_pmu.put_event_constraints)
+               x86_pmu.put_event_constraints(fake_cpuc, event);
+
+       free_fake_cpuc(fake_cpuc);
+
+       return ret;
+}
+
+/*
+ * validate a single event group
+ *
+ * validation include:
+ *     - check events are compatible which each other
+ *     - events do not compete for the same counter
+ *     - number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int validate_group(struct perf_event *event)
+{
+       struct perf_event *leader = event->group_leader;
+       struct cpu_hw_events *fake_cpuc;
+       int ret = -EINVAL, n;
+
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
+       /*
+        * the event is not yet connected with its
+        * siblings therefore we must first collect
+        * existing siblings, then add the new event
+        * before we can simulate the scheduling
+        */
+       n = collect_events(fake_cpuc, leader, true);
+       if (n < 0)
+               goto out;
+
+       fake_cpuc->n_events = n;
+       n = collect_events(fake_cpuc, event, false);
+       if (n < 0)
+               goto out;
+
+       fake_cpuc->n_events = n;
+
+       ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
+
+out:
+       free_fake_cpuc(fake_cpuc);
+       return ret;
+}
+
+static int x86_pmu_event_init(struct perf_event *event)
+{
+       struct pmu *tmp;
+       int err;
+
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       err = __x86_pmu_event_init(event);
+       if (!err) {
+               /*
+                * we temporarily connect event to its pmu
+                * such that validate_group() can classify
+                * it as an x86 event using is_x86_event()
+                */
+               tmp = event->pmu;
+               event->pmu = &pmu;
+
+               if (event->group_leader != event)
+                       err = validate_group(event);
+               else
+                       err = validate_event(event);
+
+               event->pmu = tmp;
+       }
+       if (err) {
+               if (event->destroy)
+                       event->destroy(event);
+       }
+
+       if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
+               event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
+
+       return err;
+}
+
+static void refresh_pce(void *ignored)
+{
+       if (current->mm)
+               load_mm_cr4(current->mm);
+}
+
+static void x86_pmu_event_mapped(struct perf_event *event)
+{
+       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+               return;
+
+       if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
+               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static void x86_pmu_event_unmapped(struct perf_event *event)
+{
+       if (!current->mm)
+               return;
+
+       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+               return;
+
+       if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
+               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+}
+
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+       int idx = event->hw.idx;
+
+       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
+               return 0;
+
+       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+               idx -= INTEL_PMC_IDX_FIXED;
+               idx |= 1 << 30;
+       }
+
+       return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             char *buf)
+{
+       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       unsigned long val;
+       ssize_t ret;
+
+       ret = kstrtoul(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val > 2)
+               return -EINVAL;
+
+       if (x86_pmu.attr_rdpmc_broken)
+               return -ENOTSUPP;
+
+       if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
+               /*
+                * Changing into or out of always available, aka
+                * perf-event-bypassing mode.  This path is extremely slow,
+                * but only root can trigger it, so it's okay.
+                */
+               if (val == 2)
+                       static_key_slow_inc(&rdpmc_always_available);
+               else
+                       static_key_slow_dec(&rdpmc_always_available);
+               on_each_cpu(refresh_pce, NULL, 1);
+       }
+
+       x86_pmu.attr_rdpmc = val;
+
+       return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+       &dev_attr_rdpmc.attr,
+       NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+       .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+       &x86_pmu_attr_group,
+       &x86_pmu_format_group,
+       &x86_pmu_events_group,
+       NULL,
+};
+
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       if (x86_pmu.sched_task)
+               x86_pmu.sched_task(ctx, sched_in);
+}
+
+void perf_check_microcode(void)
+{
+       if (x86_pmu.check_microcode)
+               x86_pmu.check_microcode();
+}
+EXPORT_SYMBOL_GPL(perf_check_microcode);
+
+static struct pmu pmu = {
+       .pmu_enable             = x86_pmu_enable,
+       .pmu_disable            = x86_pmu_disable,
+
+       .attr_groups            = x86_pmu_attr_groups,
+
+       .event_init             = x86_pmu_event_init,
+
+       .event_mapped           = x86_pmu_event_mapped,
+       .event_unmapped         = x86_pmu_event_unmapped,
+
+       .add                    = x86_pmu_add,
+       .del                    = x86_pmu_del,
+       .start                  = x86_pmu_start,
+       .stop                   = x86_pmu_stop,
+       .read                   = x86_pmu_read,
+
+       .start_txn              = x86_pmu_start_txn,
+       .cancel_txn             = x86_pmu_cancel_txn,
+       .commit_txn             = x86_pmu_commit_txn,
+
+       .event_idx              = x86_pmu_event_idx,
+       .sched_task             = x86_pmu_sched_task,
+       .task_ctx_size          = sizeof(struct x86_perf_task_context),
+};
+
+void arch_perf_update_userpage(struct perf_event *event,
+                              struct perf_event_mmap_page *userpg, u64 now)
+{
+       struct cyc2ns_data *data;
+
+       userpg->cap_user_time = 0;
+       userpg->cap_user_time_zero = 0;
+       userpg->cap_user_rdpmc =
+               !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
+       userpg->pmc_width = x86_pmu.cntval_bits;
+
+       if (!sched_clock_stable())
+               return;
+
+       data = cyc2ns_read_begin();
+
+       /*
+        * Internal timekeeping for enabled/running/stopped times
+        * is always in the local_clock domain.
+        */
+       userpg->cap_user_time = 1;
+       userpg->time_mult = data->cyc2ns_mul;
+       userpg->time_shift = data->cyc2ns_shift;
+       userpg->time_offset = data->cyc2ns_offset - now;
+
+       /*
+        * cap_user_time_zero doesn't make sense when we're using a different
+        * time base for the records.
+        */
+       if (event->clock == &local_clock) {
+               userpg->cap_user_time_zero = 1;
+               userpg->time_zero = data->cyc2ns_offset;
+       }
+
+       cyc2ns_read_end(data);
+}
+
+/*
+ * callchain support
+ */
+
+static int backtrace_stack(void *data, char *name)
+{
+       return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+       struct perf_callchain_entry *entry = data;
+
+       perf_callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+       .stack                  = backtrace_stack,
+       .address                = backtrace_address,
+       .walk_stack             = print_context_stack_bp,
+};
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
+
+       perf_callchain_store(entry, regs->ip);
+
+       dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
+
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+       return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
+
+static unsigned long get_segment_base(unsigned int segment)
+{
+       struct desc_struct *desc;
+       int idx = segment >> 3;
+
+       if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+               struct ldt_struct *ldt;
+
+               if (idx > LDT_ENTRIES)
+                       return 0;
+
+               /* IRQs are off, so this synchronizes with smp_store_release */
+               ldt = lockless_dereference(current->active_mm->context.ldt);
+               if (!ldt || idx > ldt->size)
+                       return 0;
+
+               desc = &ldt->entries[idx];
+#else
+               return 0;
+#endif
+       } else {
+               if (idx > GDT_ENTRIES)
+                       return 0;
+
+               desc = raw_cpu_ptr(gdt_page.gdt) + idx;
+       }
+
+       return get_desc_base(desc);
+}
+
+#ifdef CONFIG_IA32_EMULATION
+
+#include <asm/compat.h>
+
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+       /* 32-bit process in 64-bit kernel. */
+       unsigned long ss_base, cs_base;
+       struct stack_frame_ia32 frame;
+       const void __user *fp;
+
+       if (!test_thread_flag(TIF_IA32))
+               return 0;
+
+       cs_base = get_segment_base(regs->cs);
+       ss_base = get_segment_base(regs->ss);
+
+       fp = compat_ptr(ss_base + regs->bp);
+       pagefault_disable();
+       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+               unsigned long bytes;
+               frame.next_frame     = 0;
+               frame.return_address = 0;
+
+               if (!access_ok(VERIFY_READ, fp, 8))
+                       break;
+
+               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
+               if (bytes != 0)
+                       break;
+               bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
+               if (bytes != 0)
+                       break;
+
+               if (!valid_user_frame(fp, sizeof(frame)))
+                       break;
+
+               perf_callchain_store(entry, cs_base + frame.return_address);
+               fp = compat_ptr(ss_base + frame.next_frame);
+       }
+       pagefault_enable();
+       return 1;
+}
+#else
+static inline int
+perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+    return 0;
+}
+#endif
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       struct stack_frame frame;
+       const void __user *fp;
+
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
+
+       /*
+        * We don't know what to do with VM86 stacks.. ignore them for now.
+        */
+       if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
+               return;
+
+       fp = (void __user *)regs->bp;
+
+       perf_callchain_store(entry, regs->ip);
+
+       if (!current->mm)
+               return;
+
+       if (perf_callchain_user32(regs, entry))
+               return;
+
+       pagefault_disable();
+       while (entry->nr < PERF_MAX_STACK_DEPTH) {
+               unsigned long bytes;
+               frame.next_frame             = NULL;
+               frame.return_address = 0;
+
+               if (!access_ok(VERIFY_READ, fp, 16))
+                       break;
+
+               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+               if (bytes != 0)
+                       break;
+               bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+               if (bytes != 0)
+                       break;
+
+               if (!valid_user_frame(fp, sizeof(frame)))
+                       break;
+
+               perf_callchain_store(entry, frame.return_address);
+               fp = (void __user *)frame.next_frame;
+       }
+       pagefault_enable();
+}
+
+/*
+ * Deal with code segment offsets for the various execution modes:
+ *
+ *   VM86 - the good olde 16 bit days, where the linear address is
+ *          20 bits and we use regs->ip + 0x10 * regs->cs.
+ *
+ *   IA32 - Where we need to look at GDT/LDT segment descriptor tables
+ *          to figure out what the 32bit base address is.
+ *
+ *    X32 - has TIF_X32 set, but is running in x86_64
+ *
+ * X86_64 - CS,DS,SS,ES are all zero based.
+ */
+static unsigned long code_segment_base(struct pt_regs *regs)
+{
+       /*
+        * For IA32 we look at the GDT/LDT segment base to convert the
+        * effective IP to a linear address.
+        */
+
+#ifdef CONFIG_X86_32
+       /*
+        * If we are in VM86 mode, add the segment offset to convert to a
+        * linear address.
+        */
+       if (regs->flags & X86_VM_MASK)
+               return 0x10 * regs->cs;
+
+       if (user_mode(regs) && regs->cs != __USER_CS)
+               return get_segment_base(regs->cs);
+#else
+       if (user_mode(regs) && !user_64bit_mode(regs) &&
+           regs->cs != __USER32_CS)
+               return get_segment_base(regs->cs);
+#endif
+       return 0;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+               return perf_guest_cbs->get_guest_ip();
+
+       return regs->ip + code_segment_base(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+       int misc = 0;
+
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               if (perf_guest_cbs->is_user_mode())
+                       misc |= PERF_RECORD_MISC_GUEST_USER;
+               else
+                       misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+       } else {
+               if (user_mode(regs))
+                       misc |= PERF_RECORD_MISC_USER;
+               else
+                       misc |= PERF_RECORD_MISC_KERNEL;
+       }
+
+       if (regs->flags & PERF_EFLAGS_EXACT)
+               misc |= PERF_RECORD_MISC_EXACT_IP;
+
+       return misc;
+}
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+       cap->version            = x86_pmu.version;
+       cap->num_counters_gp    = x86_pmu.num_counters;
+       cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+       cap->bit_width_gp       = x86_pmu.cntval_bits;
+       cap->bit_width_fixed    = x86_pmu.cntval_bits;
+       cap->events_mask        = (unsigned int)x86_pmu.events_maskl;
+       cap->events_mask_len    = x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
new file mode 100644 (file)
index 0000000..b99dc92
--- /dev/null
@@ -0,0 +1,544 @@
+/*
+ * BTS PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/coredump.h>
+
+#include <asm-generic/sizes.h>
+#include <asm/perf_event.h>
+
+#include "../perf_event.h"
+
+struct bts_ctx {
+       struct perf_output_handle       handle;
+       struct debug_store              ds_back;
+       int                             started;
+};
+
+static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
+
+#define BTS_RECORD_SIZE                24
+#define BTS_SAFETY_MARGIN      4080
+
+struct bts_phys {
+       struct page     *page;
+       unsigned long   size;
+       unsigned long   offset;
+       unsigned long   displacement;
+};
+
+struct bts_buffer {
+       size_t          real_size;      /* multiple of BTS_RECORD_SIZE */
+       unsigned int    nr_pages;
+       unsigned int    nr_bufs;
+       unsigned int    cur_buf;
+       bool            snapshot;
+       local_t         data_size;
+       local_t         lost;
+       local_t         head;
+       unsigned long   end;
+       void            **data_pages;
+       struct bts_phys buf[0];
+};
+
+struct pmu bts_pmu;
+
+static size_t buf_size(struct page *page)
+{
+       return 1 << (PAGE_SHIFT + page_private(page));
+}
+
+static void *
+bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+{
+       struct bts_buffer *buf;
+       struct page *page;
+       int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+       unsigned long offset;
+       size_t size = nr_pages << PAGE_SHIFT;
+       int pg, nbuf, pad;
+
+       /* count all the high order buffers */
+       for (pg = 0, nbuf = 0; pg < nr_pages;) {
+               page = virt_to_page(pages[pg]);
+               if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
+                       return NULL;
+               pg += 1 << page_private(page);
+               nbuf++;
+       }
+
+       /*
+        * to avoid interrupts in overwrite mode, only allow one physical
+        */
+       if (overwrite && nbuf > 1)
+               return NULL;
+
+       buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
+       if (!buf)
+               return NULL;
+
+       buf->nr_pages = nr_pages;
+       buf->nr_bufs = nbuf;
+       buf->snapshot = overwrite;
+       buf->data_pages = pages;
+       buf->real_size = size - size % BTS_RECORD_SIZE;
+
+       for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
+               unsigned int __nr_pages;
+
+               page = virt_to_page(pages[pg]);
+               __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+               buf->buf[nbuf].page = page;
+               buf->buf[nbuf].offset = offset;
+               buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
+               buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
+               pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
+               buf->buf[nbuf].size -= pad;
+
+               pg += __nr_pages;
+               offset += __nr_pages << PAGE_SHIFT;
+       }
+
+       return buf;
+}
+
+static void bts_buffer_free_aux(void *data)
+{
+       kfree(data);
+}
+
+static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
+{
+       return buf->buf[idx].offset + buf->buf[idx].displacement;
+}
+
+static void
+bts_config_buffer(struct bts_buffer *buf)
+{
+       int cpu = raw_smp_processor_id();
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct bts_phys *phys = &buf->buf[buf->cur_buf];
+       unsigned long index, thresh = 0, end = phys->size;
+       struct page *page = phys->page;
+
+       index = local_read(&buf->head);
+
+       if (!buf->snapshot) {
+               if (buf->end < phys->offset + buf_size(page))
+                       end = buf->end - phys->offset - phys->displacement;
+
+               index -= phys->offset + phys->displacement;
+
+               if (end - index > BTS_SAFETY_MARGIN)
+                       thresh = end - BTS_SAFETY_MARGIN;
+               else if (end - index > BTS_RECORD_SIZE)
+                       thresh = end - BTS_RECORD_SIZE;
+               else
+                       thresh = end;
+       }
+
+       ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
+       ds->bts_index = ds->bts_buffer_base + index;
+       ds->bts_absolute_maximum = ds->bts_buffer_base + end;
+       ds->bts_interrupt_threshold = !buf->snapshot
+               ? ds->bts_buffer_base + thresh
+               : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
+}
+
+static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
+{
+       unsigned long index = head - phys->offset;
+
+       memset(page_address(phys->page) + index, 0, phys->size - index);
+}
+
+static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
+{
+       if (buf->snapshot)
+               return false;
+
+       if (local_read(&buf->data_size) >= bts->handle.size ||
+           bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
+               return true;
+
+       return false;
+}
+
+static void bts_update(struct bts_ctx *bts)
+{
+       int cpu = raw_smp_processor_id();
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct bts_buffer *buf = perf_get_aux(&bts->handle);
+       unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
+
+       if (!buf)
+               return;
+
+       head = index + bts_buffer_offset(buf, buf->cur_buf);
+       old = local_xchg(&buf->head, head);
+
+       if (!buf->snapshot) {
+               if (old == head)
+                       return;
+
+               if (ds->bts_index >= ds->bts_absolute_maximum)
+                       local_inc(&buf->lost);
+
+               /*
+                * old and head are always in the same physical buffer, so we
+                * can subtract them to get the data size.
+                */
+               local_add(head - old, &buf->data_size);
+       } else {
+               local_set(&buf->data_size, head);
+       }
+}
+
+static void __bts_event_start(struct perf_event *event)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct bts_buffer *buf = perf_get_aux(&bts->handle);
+       u64 config = 0;
+
+       if (!buf || bts_buffer_is_full(buf, bts))
+               return;
+
+       event->hw.itrace_started = 1;
+       event->hw.state = 0;
+
+       if (!buf->snapshot)
+               config |= ARCH_PERFMON_EVENTSEL_INT;
+       if (!event->attr.exclude_kernel)
+               config |= ARCH_PERFMON_EVENTSEL_OS;
+       if (!event->attr.exclude_user)
+               config |= ARCH_PERFMON_EVENTSEL_USR;
+
+       bts_config_buffer(buf);
+
+       /*
+        * local barrier to make sure that ds configuration made it
+        * before we enable BTS
+        */
+       wmb();
+
+       intel_pmu_enable_bts(config);
+}
+
+static void bts_event_start(struct perf_event *event, int flags)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       __bts_event_start(event);
+
+       /* PMI handler: this counter is running and likely generating PMIs */
+       ACCESS_ONCE(bts->started) = 1;
+}
+
+static void __bts_event_stop(struct perf_event *event)
+{
+       /*
+        * No extra synchronization is mandated by the documentation to have
+        * BTS data stores globally visible.
+        */
+       intel_pmu_disable_bts();
+
+       if (event->hw.state & PERF_HES_STOPPED)
+               return;
+
+       ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
+}
+
+static void bts_event_stop(struct perf_event *event, int flags)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       /* PMI handler: don't restart this counter */
+       ACCESS_ONCE(bts->started) = 0;
+
+       __bts_event_stop(event);
+
+       if (flags & PERF_EF_UPDATE)
+               bts_update(bts);
+}
+
+void intel_bts_enable_local(void)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       if (bts->handle.event && bts->started)
+               __bts_event_start(bts->handle.event);
+}
+
+void intel_bts_disable_local(void)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       if (bts->handle.event)
+               __bts_event_stop(bts->handle.event);
+}
+
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
+{
+       unsigned long head, space, next_space, pad, gap, skip, wakeup;
+       unsigned int next_buf;
+       struct bts_phys *phys, *next_phys;
+       int ret;
+
+       if (buf->snapshot)
+               return 0;
+
+       head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
+       if (WARN_ON_ONCE(head != local_read(&buf->head)))
+               return -EINVAL;
+
+       phys = &buf->buf[buf->cur_buf];
+       space = phys->offset + phys->displacement + phys->size - head;
+       pad = space;
+       if (space > handle->size) {
+               space = handle->size;
+               space -= space % BTS_RECORD_SIZE;
+       }
+       if (space <= BTS_SAFETY_MARGIN) {
+               /* See if next phys buffer has more space */
+               next_buf = buf->cur_buf + 1;
+               if (next_buf >= buf->nr_bufs)
+                       next_buf = 0;
+               next_phys = &buf->buf[next_buf];
+               gap = buf_size(phys->page) - phys->displacement - phys->size +
+                     next_phys->displacement;
+               skip = pad + gap;
+               if (handle->size >= skip) {
+                       next_space = next_phys->size;
+                       if (next_space + skip > handle->size) {
+                               next_space = handle->size - skip;
+                               next_space -= next_space % BTS_RECORD_SIZE;
+                       }
+                       if (next_space > space || !space) {
+                               if (pad)
+                                       bts_buffer_pad_out(phys, head);
+                               ret = perf_aux_output_skip(handle, skip);
+                               if (ret)
+                                       return ret;
+                               /* Advance to next phys buffer */
+                               phys = next_phys;
+                               space = next_space;
+                               head = phys->offset + phys->displacement;
+                               /*
+                                * After this, cur_buf and head won't match ds
+                                * anymore, so we must not be racing with
+                                * bts_update().
+                                */
+                               buf->cur_buf = next_buf;
+                               local_set(&buf->head, head);
+                       }
+               }
+       }
+
+       /* Don't go far beyond wakeup watermark */
+       wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
+                handle->head;
+       if (space > wakeup) {
+               space = wakeup;
+               space -= space % BTS_RECORD_SIZE;
+       }
+
+       buf->end = head + space;
+
+       /*
+        * If we have no space, the lost notification would have been sent when
+        * we hit absolute_maximum - see bts_update()
+        */
+       if (!space)
+               return -ENOSPC;
+
+       return 0;
+}
+
+int intel_bts_interrupt(void)
+{
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct perf_event *event = bts->handle.event;
+       struct bts_buffer *buf;
+       s64 old_head;
+       int err;
+
+       if (!event || !bts->started)
+               return 0;
+
+       buf = perf_get_aux(&bts->handle);
+       /*
+        * Skip snapshot counters: they don't use the interrupt, but
+        * there's no other way of telling, because the pointer will
+        * keep moving
+        */
+       if (!buf || buf->snapshot)
+               return 0;
+
+       old_head = local_read(&buf->head);
+       bts_update(bts);
+
+       /* no new data */
+       if (old_head == local_read(&buf->head))
+               return 0;
+
+       perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+                           !!local_xchg(&buf->lost, 0));
+
+       buf = perf_aux_output_begin(&bts->handle, event);
+       if (!buf)
+               return 1;
+
+       err = bts_buffer_reset(buf, &bts->handle);
+       if (err)
+               perf_aux_output_end(&bts->handle, 0, false);
+
+       return 1;
+}
+
+static void bts_event_del(struct perf_event *event, int mode)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct bts_buffer *buf = perf_get_aux(&bts->handle);
+
+       bts_event_stop(event, PERF_EF_UPDATE);
+
+       if (buf) {
+               if (buf->snapshot)
+                       bts->handle.head =
+                               local_xchg(&buf->data_size,
+                                          buf->nr_pages << PAGE_SHIFT);
+               perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+                                   !!local_xchg(&buf->lost, 0));
+       }
+
+       cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+       cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+       cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+       cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+}
+
+static int bts_event_add(struct perf_event *event, int mode)
+{
+       struct bts_buffer *buf;
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int ret = -EBUSY;
+
+       event->hw.state = PERF_HES_STOPPED;
+
+       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+               return -EBUSY;
+
+       if (bts->handle.event)
+               return -EBUSY;
+
+       buf = perf_aux_output_begin(&bts->handle, event);
+       if (!buf)
+               return -EINVAL;
+
+       ret = bts_buffer_reset(buf, &bts->handle);
+       if (ret) {
+               perf_aux_output_end(&bts->handle, 0, false);
+               return ret;
+       }
+
+       bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+       bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+       bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+       if (mode & PERF_EF_START) {
+               bts_event_start(event, 0);
+               if (hwc->state & PERF_HES_STOPPED) {
+                       bts_event_del(event, 0);
+                       return -EBUSY;
+               }
+       }
+
+       return 0;
+}
+
+static void bts_event_destroy(struct perf_event *event)
+{
+       x86_release_hardware();
+       x86_del_exclusive(x86_lbr_exclusive_bts);
+}
+
+static int bts_event_init(struct perf_event *event)
+{
+       int ret;
+
+       if (event->attr.type != bts_pmu.type)
+               return -ENOENT;
+
+       if (x86_add_exclusive(x86_lbr_exclusive_bts))
+               return -EBUSY;
+
+       /*
+        * BTS leaks kernel addresses even when CPL0 tracing is
+        * disabled, so disallow intel_bts driver for unprivileged
+        * users on paranoid systems since it provides trace data
+        * to the user in a zero-copy fashion.
+        *
+        * Note that the default paranoia setting permits unprivileged
+        * users to profile the kernel.
+        */
+       if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
+           !capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       ret = x86_reserve_hardware();
+       if (ret) {
+               x86_del_exclusive(x86_lbr_exclusive_bts);
+               return ret;
+       }
+
+       event->destroy = bts_event_destroy;
+
+       return 0;
+}
+
+static void bts_event_read(struct perf_event *event)
+{
+}
+
+static __init int bts_init(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
+               return -ENODEV;
+
+       bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
+       bts_pmu.task_ctx_nr     = perf_sw_context;
+       bts_pmu.event_init      = bts_event_init;
+       bts_pmu.add             = bts_event_add;
+       bts_pmu.del             = bts_event_del;
+       bts_pmu.start           = bts_event_start;
+       bts_pmu.stop            = bts_event_stop;
+       bts_pmu.read            = bts_event_read;
+       bts_pmu.setup_aux       = bts_buffer_setup_aux;
+       bts_pmu.free_aux        = bts_buffer_free_aux;
+
+       return perf_pmu_register(&bts_pmu, "intel_bts", -1);
+}
+arch_initcall(bts_init);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
new file mode 100644 (file)
index 0000000..68fa55b
--- /dev/null
@@ -0,0 +1,3796 @@
+/*
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/nmi.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+/*
+ * Intel PerfMon, used on Core and later.
+ */
+static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
+{
+       [PERF_COUNT_HW_CPU_CYCLES]              = 0x003c,
+       [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
+       [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x4f2e,
+       [PERF_COUNT_HW_CACHE_MISSES]            = 0x412e,
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c4,
+       [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c5,
+       [PERF_COUNT_HW_BUS_CYCLES]              = 0x013c,
+       [PERF_COUNT_HW_REF_CPU_CYCLES]          = 0x0300, /* pseudo-encoding */
+};
+
+static struct event_constraint intel_core_event_constraints[] __read_mostly =
+{
+       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+       INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_core2_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+       INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+       INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+       INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
+       INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+       INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+       INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+       INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+       INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+       INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
+{
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
+       EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+       INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+       INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_snb_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+       INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
+       INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
+       INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
+{
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
+       EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_v1_event_constraints[] __read_mostly =
+{
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_skl_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),    /* INST_RETIRED.PREC_DIST */
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
+       INTEL_UEVENT_EXTRA_REG(0x01b7,
+                              MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7,
+                              MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+       EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+       EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+       EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+       /*
+        * Note the low 8 bits eventsel code is not a continuous field, containing
+        * some #GPing bits. These are masked out.
+        */
+       INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+       EVENT_EXTRA_END
+};
+
+EVENT_ATTR_STR(mem-loads,      mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
+
+struct attribute *nhm_events_attrs[] = {
+       EVENT_PTR(mem_ld_nhm),
+       NULL,
+};
+
+struct attribute *snb_events_attrs[] = {
+       EVENT_PTR(mem_ld_snb),
+       EVENT_PTR(mem_st_snb),
+       NULL,
+};
+
+static struct event_constraint intel_hsw_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+       /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
+       /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
+       /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
+       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
+       INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),        /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+       EVENT_CONSTRAINT_END
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+       return intel_perfmon_event_map[hw_event];
+}
+
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts.
+ * - icache miss does not include decoded icache
+ */
+
+#define SKL_DEMAND_DATA_RD             BIT_ULL(0)
+#define SKL_DEMAND_RFO                 BIT_ULL(1)
+#define SKL_ANY_RESPONSE               BIT_ULL(16)
+#define SKL_SUPPLIER_NONE              BIT_ULL(17)
+#define SKL_L3_MISS_LOCAL_DRAM         BIT_ULL(26)
+#define SKL_L3_MISS_REMOTE_HOP0_DRAM   BIT_ULL(27)
+#define SKL_L3_MISS_REMOTE_HOP1_DRAM   BIT_ULL(28)
+#define SKL_L3_MISS_REMOTE_HOP2P_DRAM  BIT_ULL(29)
+#define SKL_L3_MISS                    (SKL_L3_MISS_LOCAL_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+#define SKL_SPL_HIT                    BIT_ULL(30)
+#define SKL_SNOOP_NONE                 BIT_ULL(31)
+#define SKL_SNOOP_NOT_NEEDED           BIT_ULL(32)
+#define SKL_SNOOP_MISS                 BIT_ULL(33)
+#define SKL_SNOOP_HIT_NO_FWD           BIT_ULL(34)
+#define SKL_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
+#define SKL_SNOOP_HITM                 BIT_ULL(36)
+#define SKL_SNOOP_NON_DRAM             BIT_ULL(37)
+#define SKL_ANY_SNOOP                  (SKL_SPL_HIT|SKL_SNOOP_NONE| \
+                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+                                        SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
+#define SKL_DEMAND_READ                        SKL_DEMAND_DATA_RD
+#define SKL_SNOOP_DRAM                 (SKL_SNOOP_NONE| \
+                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
+                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
+                                        SKL_SNOOP_HITM|SKL_SPL_HIT)
+#define SKL_DEMAND_WRITE               SKL_DEMAND_RFO
+#define SKL_LLC_ACCESS                 SKL_ANY_RESPONSE
+#define SKL_L3_MISS_REMOTE             (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
+                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
+
+static __initconst const u64 skl_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x283,   /* ICACHE_64B.MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x608,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x649,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2085,  /* ITLB_MISSES.STLB_HIT */
+               [ C(RESULT_MISS)   ] = 0xe85,   /* ITLB_MISSES.WALK_COMPLETED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+static __initconst const u64 skl_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
+                                      SKL_L3_MISS|SKL_ANY_SNOOP|
+                                      SKL_SUPPLIER_NONE,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
+                                      SKL_L3_MISS|SKL_ANY_SNOOP|
+                                      SKL_SUPPLIER_NONE,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
+                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
+                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
+                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
+                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+#define SNB_DMND_DATA_RD       (1ULL << 0)
+#define SNB_DMND_RFO           (1ULL << 1)
+#define SNB_DMND_IFETCH                (1ULL << 2)
+#define SNB_DMND_WB            (1ULL << 3)
+#define SNB_PF_DATA_RD         (1ULL << 4)
+#define SNB_PF_RFO             (1ULL << 5)
+#define SNB_PF_IFETCH          (1ULL << 6)
+#define SNB_LLC_DATA_RD                (1ULL << 7)
+#define SNB_LLC_RFO            (1ULL << 8)
+#define SNB_LLC_IFETCH         (1ULL << 9)
+#define SNB_BUS_LOCKS          (1ULL << 10)
+#define SNB_STRM_ST            (1ULL << 11)
+#define SNB_OTHER              (1ULL << 15)
+#define SNB_RESP_ANY           (1ULL << 16)
+#define SNB_NO_SUPP            (1ULL << 17)
+#define SNB_LLC_HITM           (1ULL << 18)
+#define SNB_LLC_HITE           (1ULL << 19)
+#define SNB_LLC_HITS           (1ULL << 20)
+#define SNB_LLC_HITF           (1ULL << 21)
+#define SNB_LOCAL              (1ULL << 22)
+#define SNB_REMOTE             (0xffULL << 23)
+#define SNB_SNP_NONE           (1ULL << 31)
+#define SNB_SNP_NOT_NEEDED     (1ULL << 32)
+#define SNB_SNP_MISS           (1ULL << 33)
+#define SNB_NO_FWD             (1ULL << 34)
+#define SNB_SNP_FWD            (1ULL << 35)
+#define SNB_HITM               (1ULL << 36)
+#define SNB_NON_DRAM           (1ULL << 37)
+
+#define SNB_DMND_READ          (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
+#define SNB_DMND_WRITE         (SNB_DMND_RFO|SNB_LLC_RFO)
+#define SNB_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SNB_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
+                                SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
+                                SNB_HITM)
+
+#define SNB_DRAM_ANY           (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
+#define SNB_DRAM_REMOTE                (SNB_REMOTE|SNB_SNP_ANY)
+
+#define SNB_L3_ACCESS          SNB_RESP_ANY
+#define SNB_L3_MISS            (SNB_DRAM_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 snb_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
+               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
+               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
+               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
+       },
+ },
+};
+
+static __initconst const u64 snb_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
+               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
+               [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
+               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+
+};
+
+/*
+ * Notes on the events:
+ * - data reads do not include code reads (comparable to earlier tables)
+ * - data counts include speculative execution (except L1 write, dtlb, bpu)
+ * - remote node access includes remote memory, remote cache, remote mmio.
+ * - prefetches are not included in the counts because they are not
+ *   reliably counted.
+ */
+
+#define HSW_DEMAND_DATA_RD             BIT_ULL(0)
+#define HSW_DEMAND_RFO                 BIT_ULL(1)
+#define HSW_ANY_RESPONSE               BIT_ULL(16)
+#define HSW_SUPPLIER_NONE              BIT_ULL(17)
+#define HSW_L3_MISS_LOCAL_DRAM         BIT_ULL(22)
+#define HSW_L3_MISS_REMOTE_HOP0                BIT_ULL(27)
+#define HSW_L3_MISS_REMOTE_HOP1                BIT_ULL(28)
+#define HSW_L3_MISS_REMOTE_HOP2P       BIT_ULL(29)
+#define HSW_L3_MISS                    (HSW_L3_MISS_LOCAL_DRAM| \
+                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+                                        HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_SNOOP_NONE                 BIT_ULL(31)
+#define HSW_SNOOP_NOT_NEEDED           BIT_ULL(32)
+#define HSW_SNOOP_MISS                 BIT_ULL(33)
+#define HSW_SNOOP_HIT_NO_FWD           BIT_ULL(34)
+#define HSW_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
+#define HSW_SNOOP_HITM                 BIT_ULL(36)
+#define HSW_SNOOP_NON_DRAM             BIT_ULL(37)
+#define HSW_ANY_SNOOP                  (HSW_SNOOP_NONE| \
+                                        HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
+                                        HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
+                                        HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
+#define HSW_SNOOP_DRAM                 (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
+#define HSW_DEMAND_READ                        HSW_DEMAND_DATA_RD
+#define HSW_DEMAND_WRITE               HSW_DEMAND_RFO
+#define HSW_L3_MISS_REMOTE             (HSW_L3_MISS_REMOTE_HOP0|\
+                                        HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
+#define HSW_LLC_ACCESS                 HSW_ANY_RESPONSE
+
+#define BDW_L3_MISS_LOCAL              BIT(26)
+#define BDW_L3_MISS                    (BDW_L3_MISS_LOCAL| \
+                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
+                                        HSW_L3_MISS_REMOTE_HOP2P)
+
+
+static __initconst const u64 hsw_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x280,   /* ICACHE.MISSES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
+               [ C(RESULT_MISS)   ] = 0x108,   /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
+               [ C(RESULT_MISS)   ] = 0x149,   /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x6085,  /* ITLB_MISSES.STLB_HIT */
+               [ C(RESULT_MISS)   ] = 0x185,   /* ITLB_MISSES.MISS_CAUSES_A_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
+               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+static __initconst const u64 hsw_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+                                      HSW_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+                                      HSW_L3_MISS|HSW_ANY_SNOOP,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+                                      HSW_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+                                      HSW_L3_MISS|HSW_ANY_SNOOP,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
+                                      HSW_L3_MISS_LOCAL_DRAM|
+                                      HSW_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
+                                      HSW_L3_MISS_REMOTE|
+                                      HSW_SNOOP_DRAM,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
+                                      HSW_L3_MISS_LOCAL_DRAM|
+                                      HSW_SNOOP_DRAM,
+               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
+                                      HSW_L3_MISS_REMOTE|
+                                      HSW_SNOOP_DRAM,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+};
+
+static __initconst const u64 westmere_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       /*
+        * Use RFO, not WRITEBACK, because a write miss would typically occur
+        * on RFO.
+        */
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+};
+
+/*
+ * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
+ * See IA32 SDM Vol 3B 30.6.1.3
+ */
+
+#define NHM_DMND_DATA_RD       (1 << 0)
+#define NHM_DMND_RFO           (1 << 1)
+#define NHM_DMND_IFETCH                (1 << 2)
+#define NHM_DMND_WB            (1 << 3)
+#define NHM_PF_DATA_RD         (1 << 4)
+#define NHM_PF_DATA_RFO                (1 << 5)
+#define NHM_PF_IFETCH          (1 << 6)
+#define NHM_OFFCORE_OTHER      (1 << 7)
+#define NHM_UNCORE_HIT         (1 << 8)
+#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
+#define NHM_OTHER_CORE_HITM    (1 << 10)
+                               /* reserved */
+#define NHM_REMOTE_CACHE_FWD   (1 << 12)
+#define NHM_REMOTE_DRAM                (1 << 13)
+#define NHM_LOCAL_DRAM         (1 << 14)
+#define NHM_NON_DRAM           (1 << 15)
+
+#define NHM_LOCAL              (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_REMOTE             (NHM_REMOTE_DRAM)
+
+#define NHM_DMND_READ          (NHM_DMND_DATA_RD)
+#define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
+#define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
+
+#define NHM_L3_HIT     (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
+#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
+
+static __initconst const u64 nehalem_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
+               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
+       },
+ },
+};
+
+static __initconst const u64 nehalem_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       /*
+        * Use RFO, not WRITEBACK, because a write miss would typically occur
+        * on RFO.
+        */
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+               [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+};
+
+static __initconst const u64 core2_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+               [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+               [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+               [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+               [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+static __initconst const u64 atom_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+               [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+               [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
+       EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ          SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE         SNB_DMND_RFO
+#define SLM_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS         SNB_RESP_ANY
+#define SLM_LLC_MISS           (SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+               [ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+       },
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+               [ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+       [ C(OP_PREFETCH) ] = {
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+               [ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+#define KNL_OT_L2_HITE         BIT_ULL(19) /* Other Tile L2 Hit */
+#define KNL_OT_L2_HITF         BIT_ULL(20) /* Other Tile L2 Hit */
+#define KNL_MCDRAM_LOCAL       BIT_ULL(21)
+#define KNL_MCDRAM_FAR         BIT_ULL(22)
+#define KNL_DDR_LOCAL          BIT_ULL(23)
+#define KNL_DDR_FAR            BIT_ULL(24)
+#define KNL_DRAM_ANY           (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
+                                   KNL_DDR_LOCAL | KNL_DDR_FAR)
+#define KNL_L2_READ            SLM_DMND_READ
+#define KNL_L2_WRITE           SLM_DMND_WRITE
+#define KNL_L2_PREFETCH                SLM_DMND_PREFETCH
+#define KNL_L2_ACCESS          SLM_LLC_ACCESS
+#define KNL_L2_MISS            (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
+                                  KNL_DRAM_ANY | SNB_SNP_ANY | \
+                                                 SNB_NON_DRAM)
+
+static __initconst const u64 knl_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
+                       [C(RESULT_MISS)]   = 0,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
+                       [C(RESULT_MISS)]   = KNL_L2_WRITE | KNL_L2_MISS,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
+                       [C(RESULT_MISS)]   = KNL_L2_PREFETCH | KNL_L2_MISS,
+               },
+       },
+};
+
+/*
+ * Used from PMIs where the LBRs are already disabled.
+ *
+ * This function could be called consecutively. It is required to remain in
+ * disabled state if called consecutively.
+ *
+ * During consecutive calls, the same disable value will be written to related
+ * registers, so the PMU state remains unchanged. hw.state in
+ * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
+ * calls.
+ */
+static void __intel_pmu_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+               intel_pmu_disable_bts();
+       else
+               intel_bts_disable_local();
+
+       intel_pmu_pebs_disable_all();
+}
+
+static void intel_pmu_disable_all(void)
+{
+       __intel_pmu_disable_all();
+       intel_pmu_lbr_disable_all();
+}
+
+static void __intel_pmu_enable_all(int added, bool pmi)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       intel_pmu_pebs_enable_all();
+       intel_pmu_lbr_enable_all(pmi);
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+                       x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
+
+       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+               struct perf_event *event =
+                       cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
+
+               if (WARN_ON_ONCE(!event))
+                       return;
+
+               intel_pmu_enable_bts(event->hw.config);
+       } else
+               intel_bts_enable_local();
+}
+
+static void intel_pmu_enable_all(int added)
+{
+       __intel_pmu_enable_all(added, false);
+}
+
+/*
+ * Workaround for:
+ *   Intel Errata AAK100 (model 26)
+ *   Intel Errata AAP53  (model 30)
+ *   Intel Errata BD53   (model 44)
+ *
+ * The official story:
+ *   These chips need to be 'reset' when adding counters by programming the
+ *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ *   in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
+ */
+static void intel_pmu_nhm_workaround(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       static const unsigned long nhm_magic[4] = {
+               0x4300B5,
+               0x4300D2,
+               0x4300B1,
+               0x4300B1
+       };
+       struct perf_event *event;
+       int i;
+
+       /*
+        * The Errata requires below steps:
+        * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+        * 2) Configure 4 PERFEVTSELx with the magic events and clear
+        *    the corresponding PMCx;
+        * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+        * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+        * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+        */
+
+       /*
+        * The real steps we choose are a little different from above.
+        * A) To reduce MSR operations, we don't run step 1) as they
+        *    are already cleared before this function is called;
+        * B) Call x86_perf_event_update to save PMCx before configuring
+        *    PERFEVTSELx with magic number;
+        * C) With step 5), we do clear only when the PERFEVTSELx is
+        *    not used currently.
+        * D) Call x86_perf_event_set_period to restore PMCx;
+        */
+
+       /* We always operate 4 pairs of PERF Counters */
+       for (i = 0; i < 4; i++) {
+               event = cpuc->events[i];
+               if (event)
+                       x86_perf_event_update(event);
+       }
+
+       for (i = 0; i < 4; i++) {
+               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+               wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+       }
+
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+
+       for (i = 0; i < 4; i++) {
+               event = cpuc->events[i];
+
+               if (event) {
+                       x86_perf_event_set_period(event);
+                       __x86_pmu_enable_event(&event->hw,
+                                       ARCH_PERFMON_EVENTSEL_ENABLE);
+               } else
+                       wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
+       }
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+       if (added)
+               intel_pmu_nhm_workaround();
+       intel_pmu_enable_all(added);
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+       u64 status;
+
+       rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+       return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+       wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+{
+       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+       u64 ctrl_val, mask;
+
+       mask = 0xfULL << (idx * 4);
+
+       rdmsrl(hwc->config_base, ctrl_val);
+       ctrl_val &= ~mask;
+       wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+       return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
+static void intel_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+               intel_pmu_disable_bts();
+               intel_pmu_drain_bts_buffer();
+               return;
+       }
+
+       cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
+       cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+       cpuc->intel_cp_status &= ~(1ull << hwc->idx);
+
+       /*
+        * must disable before any actual event
+        * because any event may be combined with LBR
+        */
+       if (needs_branch_stack(event))
+               intel_pmu_lbr_disable(event);
+
+       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+               intel_pmu_disable_fixed(hwc);
+               return;
+       }
+
+       x86_pmu_disable_event(event);
+
+       if (unlikely(event->attr.precise_ip))
+               intel_pmu_pebs_disable(event);
+}
+
+static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+{
+       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+       u64 ctrl_val, bits, mask;
+
+       /*
+        * Enable IRQ generation (0x8),
+        * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+        * if requested:
+        */
+       bits = 0x8ULL;
+       if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+               bits |= 0x2;
+       if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+               bits |= 0x1;
+
+       /*
+        * ANY bit is supported in v3 and up
+        */
+       if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+               bits |= 0x4;
+
+       bits <<= (idx * 4);
+       mask = 0xfULL << (idx * 4);
+
+       rdmsrl(hwc->config_base, ctrl_val);
+       ctrl_val &= ~mask;
+       ctrl_val |= bits;
+       wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void intel_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
+               if (!__this_cpu_read(cpu_hw_events.enabled))
+                       return;
+
+               intel_pmu_enable_bts(hwc->config);
+               return;
+       }
+       /*
+        * must enabled before any actual event
+        * because any event may be combined with LBR
+        */
+       if (needs_branch_stack(event))
+               intel_pmu_lbr_enable(event);
+
+       if (event->attr.exclude_host)
+               cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
+       if (event->attr.exclude_guest)
+               cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+
+       if (unlikely(event_is_checkpointed(event)))
+               cpuc->intel_cp_status |= (1ull << hwc->idx);
+
+       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+               intel_pmu_enable_fixed(hwc);
+               return;
+       }
+
+       if (unlikely(event->attr.precise_ip))
+               intel_pmu_pebs_enable(event);
+
+       __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+int intel_pmu_save_and_restart(struct perf_event *event)
+{
+       x86_perf_event_update(event);
+       /*
+        * For a checkpointed counter always reset back to 0.  This
+        * avoids a situation where the counter overflows, aborts the
+        * transaction and is then set back to shortly before the
+        * overflow, and overflows and aborts again.
+        */
+       if (unlikely(event_is_checkpointed(event))) {
+               /* No race with NMIs because the counter should not be armed */
+               wrmsrl(event->hw.event_base, 0);
+               local64_set(&event->hw.prev_count, 0);
+       }
+       return x86_perf_event_set_period(event);
+}
+
+static void intel_pmu_reset(void)
+{
+       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+       unsigned long flags;
+       int idx;
+
+       if (!x86_pmu.num_counters)
+               return;
+
+       local_irq_save(flags);
+
+       pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
+               wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
+       }
+       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
+               wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+
+       if (ds)
+               ds->bts_index = ds->bts_buffer_base;
+
+       /* Ack all overflows and disable fixed counters */
+       if (x86_pmu.version >= 2) {
+               intel_pmu_ack_status(intel_pmu_get_status());
+               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+       }
+
+       /* Reset LBRs and LBR freezing */
+       if (x86_pmu.lbr_nr) {
+               update_debugctlmsr(get_debugctlmsr() &
+                       ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
+       }
+
+       local_irq_restore(flags);
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       int bit, loops;
+       u64 status;
+       int handled;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * No known reason to not always do late ACK,
+        * but just in case do it opt-in.
+        */
+       if (!x86_pmu.late_ack)
+               apic_write(APIC_LVTPC, APIC_DM_NMI);
+       __intel_pmu_disable_all();
+       handled = intel_pmu_drain_bts_buffer();
+       handled += intel_bts_interrupt();
+       status = intel_pmu_get_status();
+       if (!status)
+               goto done;
+
+       loops = 0;
+again:
+       intel_pmu_lbr_read();
+       intel_pmu_ack_status(status);
+       if (++loops > 100) {
+               static bool warned = false;
+               if (!warned) {
+                       WARN(1, "perfevents: irq loop stuck!\n");
+                       perf_event_print_debug();
+                       warned = true;
+               }
+               intel_pmu_reset();
+               goto done;
+       }
+
+       inc_irq_stat(apic_perf_irqs);
+
+
+       /*
+        * Ignore a range of extra bits in status that do not indicate
+        * overflow by themselves.
+        */
+       status &= ~(GLOBAL_STATUS_COND_CHG |
+                   GLOBAL_STATUS_ASIF |
+                   GLOBAL_STATUS_LBRS_FROZEN);
+       if (!status)
+               goto done;
+
+       /*
+        * PEBS overflow sets bit 62 in the global status register
+        */
+       if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+               handled++;
+               x86_pmu.drain_pebs(regs);
+               /*
+                * There are cases where, even though, the PEBS ovfl bit is set
+                * in GLOBAL_OVF_STATUS, the PEBS events may also have their
+                * overflow bits set for their counters. We must clear them
+                * here because they have been processed as exact samples in
+                * the drain_pebs() routine. They must not be processed again
+                * in the for_each_bit_set() loop for regular samples below.
+                */
+               status &= ~cpuc->pebs_enabled;
+               status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
+       }
+
+       /*
+        * Intel PT
+        */
+       if (__test_and_clear_bit(55, (unsigned long *)&status)) {
+               handled++;
+               intel_pt_interrupt();
+       }
+
+       /*
+        * Checkpointed counters can lead to 'spurious' PMIs because the
+        * rollback caused by the PMI will have cleared the overflow status
+        * bit. Therefore always force probe these counters.
+        */
+       status |= cpuc->intel_cp_status;
+
+       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+               struct perf_event *event = cpuc->events[bit];
+
+               handled++;
+
+               if (!test_bit(bit, cpuc->active_mask))
+                       continue;
+
+               if (!intel_pmu_save_and_restart(event))
+                       continue;
+
+               perf_sample_data_init(&data, 0, event->hw.last_period);
+
+               if (has_branch_stack(event))
+                       data.br_stack = &cpuc->lbr_stack;
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       /*
+        * Repeat if there is more work to be done:
+        */
+       status = intel_pmu_get_status();
+       if (status)
+               goto again;
+
+done:
+       /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+       if (cpuc->enabled)
+               __intel_pmu_enable_all(0, true);
+
+       /*
+        * Only unmask the NMI after the overflow counters
+        * have been reset. This avoids spurious NMIs on
+        * Haswell CPUs.
+        */
+       if (x86_pmu.late_ack)
+               apic_write(APIC_LVTPC, APIC_DM_NMI);
+       return handled;
+}
+
+static struct event_constraint *
+intel_bts_constraints(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned int hw_event, bts_event;
+
+       if (event->attr.freq)
+               return NULL;
+
+       hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+       bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+       if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+               return &bts_constraint;
+
+       return NULL;
+}
+
+static int intel_alt_er(int idx, u64 config)
+{
+       int alt_idx = idx;
+
+       if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
+               return idx;
+
+       if (idx == EXTRA_REG_RSP_0)
+               alt_idx = EXTRA_REG_RSP_1;
+
+       if (idx == EXTRA_REG_RSP_1)
+               alt_idx = EXTRA_REG_RSP_0;
+
+       if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
+               return idx;
+
+       return alt_idx;
+}
+
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+       event->hw.extra_reg.idx = idx;
+
+       if (idx == EXTRA_REG_RSP_0) {
+               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
+               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+       } else if (idx == EXTRA_REG_RSP_1) {
+               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
+               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
+       }
+}
+
+/*
+ * manage allocation of shared extra msr for certain events
+ *
+ * sharing can be:
+ * per-cpu: to be shared between the various events on a single PMU
+ * per-core: per-cpu + shared by HT threads
+ */
+static struct event_constraint *
+__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
+                                  struct perf_event *event,
+                                  struct hw_perf_event_extra *reg)
+{
+       struct event_constraint *c = &emptyconstraint;
+       struct er_account *era;
+       unsigned long flags;
+       int idx = reg->idx;
+
+       /*
+        * reg->alloc can be set due to existing state, so for fake cpuc we
+        * need to ignore this, otherwise we might fail to allocate proper fake
+        * state for this extra reg constraint. Also see the comment below.
+        */
+       if (reg->alloc && !cpuc->is_fake)
+               return NULL; /* call x86_get_event_constraint() */
+
+again:
+       era = &cpuc->shared_regs->regs[idx];
+       /*
+        * we use spin_lock_irqsave() to avoid lockdep issues when
+        * passing a fake cpuc
+        */
+       raw_spin_lock_irqsave(&era->lock, flags);
+
+       if (!atomic_read(&era->ref) || era->config == reg->config) {
+
+               /*
+                * If its a fake cpuc -- as per validate_{group,event}() we
+                * shouldn't touch event state and we can avoid doing so
+                * since both will only call get_event_constraints() once
+                * on each event, this avoids the need for reg->alloc.
+                *
+                * Not doing the ER fixup will only result in era->reg being
+                * wrong, but since we won't actually try and program hardware
+                * this isn't a problem either.
+                */
+               if (!cpuc->is_fake) {
+                       if (idx != reg->idx)
+                               intel_fixup_er(event, idx);
+
+                       /*
+                        * x86_schedule_events() can call get_event_constraints()
+                        * multiple times on events in the case of incremental
+                        * scheduling(). reg->alloc ensures we only do the ER
+                        * allocation once.
+                        */
+                       reg->alloc = 1;
+               }
+
+               /* lock in msr value */
+               era->config = reg->config;
+               era->reg = reg->reg;
+
+               /* one more user */
+               atomic_inc(&era->ref);
+
+               /*
+                * need to call x86_get_event_constraint()
+                * to check if associated event has constraints
+                */
+               c = NULL;
+       } else {
+               idx = intel_alt_er(idx, reg->config);
+               if (idx != reg->idx) {
+                       raw_spin_unlock_irqrestore(&era->lock, flags);
+                       goto again;
+               }
+       }
+       raw_spin_unlock_irqrestore(&era->lock, flags);
+
+       return c;
+}
+
+static void
+__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
+                                  struct hw_perf_event_extra *reg)
+{
+       struct er_account *era;
+
+       /*
+        * Only put constraint if extra reg was actually allocated. Also takes
+        * care of event which do not use an extra shared reg.
+        *
+        * Also, if this is a fake cpuc we shouldn't touch any event state
+        * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+        * either since it'll be thrown out.
+        */
+       if (!reg->alloc || cpuc->is_fake)
+               return;
+
+       era = &cpuc->shared_regs->regs[reg->idx];
+
+       /* one fewer user */
+       atomic_dec(&era->ref);
+
+       /* allocate again next time */
+       reg->alloc = 0;
+}
+
+static struct event_constraint *
+intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
+                             struct perf_event *event)
+{
+       struct event_constraint *c = NULL, *d;
+       struct hw_perf_event_extra *xreg, *breg;
+
+       xreg = &event->hw.extra_reg;
+       if (xreg->idx != EXTRA_REG_NONE) {
+               c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+               if (c == &emptyconstraint)
+                       return c;
+       }
+       breg = &event->hw.branch_reg;
+       if (breg->idx != EXTRA_REG_NONE) {
+               d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+               if (d == &emptyconstraint) {
+                       __intel_shared_reg_put_constraints(cpuc, xreg);
+                       c = d;
+               }
+       }
+       return c;
+}
+
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       if (x86_pmu.event_constraints) {
+               for_each_event_constraint(c, x86_pmu.event_constraints) {
+                       if ((event->hw.config & c->cmask) == c->code) {
+                               event->hw.flags |= c->flags;
+                               return c;
+                       }
+               }
+       }
+
+       return &unconstrained;
+}
+
+static struct event_constraint *
+__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                           struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       c = intel_bts_constraints(event);
+       if (c)
+               return c;
+
+       c = intel_shared_regs_constraints(cpuc, event);
+       if (c)
+               return c;
+
+       c = intel_pebs_constraints(event);
+       if (c)
+               return c;
+
+       return x86_get_event_constraints(cpuc, idx, event);
+}
+
+static void
+intel_start_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl;
+       int tid = cpuc->excl_thread_id;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return;
+
+       /*
+        * no exclusion needed
+        */
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       xl = &excl_cntrs->states[tid];
+
+       xl->sched_started = true;
+       /*
+        * lock shared state until we are done scheduling
+        * in stop_event_scheduling()
+        * makes scheduling appear as a transaction
+        */
+       raw_spin_lock(&excl_cntrs->lock);
+}
+
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct event_constraint *c = cpuc->event_constraint[idx];
+       struct intel_excl_states *xl;
+       int tid = cpuc->excl_thread_id;
+
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return;
+
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+               return;
+
+       xl = &excl_cntrs->states[tid];
+
+       lockdep_assert_held(&excl_cntrs->lock);
+
+       if (c->flags & PERF_X86_EVENT_EXCL)
+               xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
+       else
+               xl->state[cntr] = INTEL_EXCL_SHARED;
+}
+
+static void
+intel_stop_scheduling(struct cpu_hw_events *cpuc)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xl;
+       int tid = cpuc->excl_thread_id;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return;
+       /*
+        * no exclusion needed
+        */
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       xl = &excl_cntrs->states[tid];
+
+       xl->sched_started = false;
+       /*
+        * release shared state lock (acquired in intel_start_scheduling())
+        */
+       raw_spin_unlock(&excl_cntrs->lock);
+}
+
+static struct event_constraint *
+intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+                          int idx, struct event_constraint *c)
+{
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       struct intel_excl_states *xlo;
+       int tid = cpuc->excl_thread_id;
+       int is_excl, i;
+
+       /*
+        * validating a group does not require
+        * enforcing cross-thread  exclusion
+        */
+       if (cpuc->is_fake || !is_ht_workaround_enabled())
+               return c;
+
+       /*
+        * no exclusion needed
+        */
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return c;
+
+       /*
+        * because we modify the constraint, we need
+        * to make a copy. Static constraints come
+        * from static const tables.
+        *
+        * only needed when constraint has not yet
+        * been cloned (marked dynamic)
+        */
+       if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+               struct event_constraint *cx;
+
+               /*
+                * grab pre-allocated constraint entry
+                */
+               cx = &cpuc->constraint_list[idx];
+
+               /*
+                * initialize dynamic constraint
+                * with static constraint
+                */
+               *cx = *c;
+
+               /*
+                * mark constraint as dynamic, so we
+                * can free it later on
+                */
+               cx->flags |= PERF_X86_EVENT_DYNAMIC;
+               c = cx;
+       }
+
+       /*
+        * From here on, the constraint is dynamic.
+        * Either it was just allocated above, or it
+        * was allocated during a earlier invocation
+        * of this function
+        */
+
+       /*
+        * state of sibling HT
+        */
+       xlo = &excl_cntrs->states[tid ^ 1];
+
+       /*
+        * event requires exclusive counter access
+        * across HT threads
+        */
+       is_excl = c->flags & PERF_X86_EVENT_EXCL;
+       if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+               event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+               if (!cpuc->n_excl++)
+                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+       }
+
+       /*
+        * Modify static constraint with current dynamic
+        * state of thread
+        *
+        * EXCLUSIVE: sibling counter measuring exclusive event
+        * SHARED   : sibling counter measuring non-exclusive event
+        * UNUSED   : sibling counter unused
+        */
+       for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
+               /*
+                * exclusive event in sibling counter
+                * our corresponding counter cannot be used
+                * regardless of our event
+                */
+               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+                       __clear_bit(i, c->idxmsk);
+               /*
+                * if measuring an exclusive event, sibling
+                * measuring non-exclusive, then counter cannot
+                * be used
+                */
+               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+                       __clear_bit(i, c->idxmsk);
+       }
+
+       /*
+        * recompute actual bit weight for scheduling algorithm
+        */
+       c->weight = hweight64(c->idxmsk64);
+
+       /*
+        * if we return an empty mask, then switch
+        * back to static empty constraint to avoid
+        * the cost of freeing later on
+        */
+       if (c->weight == 0)
+               c = &emptyconstraint;
+
+       return c;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                           struct perf_event *event)
+{
+       struct event_constraint *c1 = NULL;
+       struct event_constraint *c2;
+
+       if (idx >= 0) /* fake does < 0 */
+               c1 = cpuc->event_constraint[idx];
+
+       /*
+        * first time only
+        * - static constraint: no change across incremental scheduling calls
+        * - dynamic constraint: handled by intel_get_excl_constraints()
+        */
+       c2 = __intel_get_event_constraints(cpuc, idx, event);
+       if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+               bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
+               c1->weight = c2->weight;
+               c2 = c1;
+       }
+
+       if (cpuc->excl_cntrs)
+               return intel_get_excl_constraints(cpuc, event, idx, c2);
+
+       return c2;
+}
+
+static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
+               struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+       int tid = cpuc->excl_thread_id;
+       struct intel_excl_states *xl;
+
+       /*
+        * nothing needed if in group validation mode
+        */
+       if (cpuc->is_fake)
+               return;
+
+       if (WARN_ON_ONCE(!excl_cntrs))
+               return;
+
+       if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+               hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+               if (!--cpuc->n_excl)
+                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+       }
+
+       /*
+        * If event was actually assigned, then mark the counter state as
+        * unused now.
+        */
+       if (hwc->idx >= 0) {
+               xl = &excl_cntrs->states[tid];
+
+               /*
+                * put_constraint may be called from x86_schedule_events()
+                * which already has the lock held so here make locking
+                * conditional.
+                */
+               if (!xl->sched_started)
+                       raw_spin_lock(&excl_cntrs->lock);
+
+               xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
+
+               if (!xl->sched_started)
+                       raw_spin_unlock(&excl_cntrs->lock);
+       }
+}
+
+static void
+intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
+                                       struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg;
+
+       reg = &event->hw.extra_reg;
+       if (reg->idx != EXTRA_REG_NONE)
+               __intel_shared_reg_put_constraints(cpuc, reg);
+
+       reg = &event->hw.branch_reg;
+       if (reg->idx != EXTRA_REG_NONE)
+               __intel_shared_reg_put_constraints(cpuc, reg);
+}
+
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+                                       struct perf_event *event)
+{
+       intel_put_shared_regs_event_constraints(cpuc, event);
+
+       /*
+        * is PMU has exclusive counter restrictions, then
+        * all events are subject to and must call the
+        * put_excl_constraints() routine
+        */
+       if (cpuc->excl_cntrs)
+               intel_put_excl_constraints(cpuc, event);
+}
+
+static void intel_pebs_aliases_core2(struct perf_event *event)
+{
+       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use INST_RETIRED.ANY_P
+                * (0x00c0), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * INST_RETIRED.ANY_P counts the number of cycles that retires
+                * CNTMASK instructions. By setting CNTMASK to a value (16)
+                * larger than the maximum number of instructions that can be
+                * retired per cycle (4) and then inverting the condition, we
+                * count all cycles that retire 16 or less instructions, which
+                * is every cycle.
+                *
+                * Thereby we gain a PEBS capable cycle counter.
+                */
+               u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+}
+
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use UOPS_RETIRED.ALL
+                * (0x01c2), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * UOPS_RETIRED.ALL counts the number of cycles that retires
+                * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+                * larger than the maximum number of micro-ops that can be
+                * retired per cycle (4) and then inverting the condition, we
+                * count all cycles that retire 16 or less micro-ops, which
+                * is every cycle.
+                *
+                * Thereby we gain a PEBS capable cycle counter.
+                */
+               u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+}
+
+static void intel_pebs_aliases_precdist(struct perf_event *event)
+{
+       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use INST_RETIRED.PREC_DIST
+                * (0x01c0), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * The PREC_DIST event has special support to minimize sample
+                * shadowing effects. One drawback is that it can be
+                * only programmed on counter 1, but that seems like an
+                * acceptable trade off.
+                */
+               u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+}
+
+static void intel_pebs_aliases_ivb(struct perf_event *event)
+{
+       if (event->attr.precise_ip < 3)
+               return intel_pebs_aliases_snb(event);
+       return intel_pebs_aliases_precdist(event);
+}
+
+static void intel_pebs_aliases_skl(struct perf_event *event)
+{
+       if (event->attr.precise_ip < 3)
+               return intel_pebs_aliases_core2(event);
+       return intel_pebs_aliases_precdist(event);
+}
+
+static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+{
+       unsigned long flags = x86_pmu.free_running_flags;
+
+       if (event->attr.use_clockid)
+               flags &= ~PERF_SAMPLE_TIME;
+       return flags;
+}
+
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+       int ret = x86_pmu_hw_config(event);
+
+       if (ret)
+               return ret;
+
+       if (event->attr.precise_ip) {
+               if (!event->attr.freq) {
+                       event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+                       if (!(event->attr.sample_type &
+                             ~intel_pmu_free_running_flags(event)))
+                               event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+               }
+               if (x86_pmu.pebs_aliases)
+                       x86_pmu.pebs_aliases(event);
+       }
+
+       if (needs_branch_stack(event)) {
+               ret = intel_pmu_setup_lbr_filter(event);
+               if (ret)
+                       return ret;
+
+               /*
+                * BTS is set up earlier in this path, so don't account twice
+                */
+               if (!intel_pmu_has_bts(event)) {
+                       /* disallow lbr if conflicting events are present */
+                       if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+                               return -EBUSY;
+
+                       event->destroy = hw_perf_lbr_event_destroy;
+               }
+       }
+
+       if (event->attr.type != PERF_TYPE_RAW)
+               return 0;
+
+       if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
+               return 0;
+
+       if (x86_pmu.version < 3)
+               return -EINVAL;
+
+       if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
+
+       return 0;
+}
+
+struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+{
+       if (x86_pmu.guest_get_msrs)
+               return x86_pmu.guest_get_msrs(nr);
+       *nr = 0;
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
+
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+
+       arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
+       arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
+       arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
+       /*
+        * If PMU counter has PEBS enabled it is not enough to disable counter
+        * on a guest entry since PEBS memory write can overshoot guest entry
+        * and corrupt guest memory. Disabling PEBS solves the problem.
+        */
+       arr[1].msr = MSR_IA32_PEBS_ENABLE;
+       arr[1].host = cpuc->pebs_enabled;
+       arr[1].guest = 0;
+
+       *nr = 2;
+       return arr;
+}
+
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
+               struct perf_event *event = cpuc->events[idx];
+
+               arr[idx].msr = x86_pmu_config_addr(idx);
+               arr[idx].host = arr[idx].guest = 0;
+
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+
+               arr[idx].host = arr[idx].guest =
+                       event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
+
+               if (event->attr.exclude_host)
+                       arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+               else if (event->attr.exclude_guest)
+                       arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+       }
+
+       *nr = x86_pmu.num_counters;
+       return arr;
+}
+
+static void core_pmu_enable_event(struct perf_event *event)
+{
+       if (!event->attr.exclude_host)
+               x86_pmu_enable_event(event);
+}
+
+static void core_pmu_enable_all(int added)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
+
+               if (!test_bit(idx, cpuc->active_mask) ||
+                               cpuc->events[idx]->attr.exclude_host)
+                       continue;
+
+               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
+       }
+}
+
+static int hsw_hw_config(struct perf_event *event)
+{
+       int ret = intel_pmu_hw_config(event);
+
+       if (ret)
+               return ret;
+       if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+               return 0;
+       event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+       /*
+        * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+        * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+        * this combination.
+        */
+       if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+            ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+             event->attr.precise_ip > 0))
+               return -EOPNOTSUPP;
+
+       if (event_is_checkpointed(event)) {
+               /*
+                * Sampling of checkpointed events can cause situations where
+                * the CPU constantly aborts because of a overflow, which is
+                * then checkpointed back and ignored. Forbid checkpointing
+                * for sampling.
+                *
+                * But still allow a long sampling period, so that perf stat
+                * from KVM works.
+                */
+               if (event->attr.sample_period > 0 &&
+                   event->attr.sample_period < 0x7fffffff)
+                       return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
+static struct event_constraint counter2_constraint =
+                       EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       c = intel_get_event_constraints(cpuc, idx, event);
+
+       /* Handle special quirk on in_tx_checkpointed only in counter 2 */
+       if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+               if (c->idxmsk64 & (1U << 2))
+                       return &counter2_constraint;
+               return &emptyconstraint;
+       }
+
+       return c;
+}
+
+/*
+ * Broadwell:
+ *
+ * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
+ * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
+ * the two to enforce a minimum period of 128 (the smallest value that has bits
+ * 0-5 cleared and >= 100).
+ *
+ * Because of how the code in x86_perf_event_set_period() works, the truncation
+ * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
+ * to make up for the 'lost' events due to carrying the 'error' in period_left.
+ *
+ * Therefore the effective (average) period matches the requested period,
+ * despite coarser hardware granularity.
+ */
+static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+{
+       if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
+                       X86_CONFIG(.event=0xc0, .umask=0x01)) {
+               if (left < 128)
+                       left = 128;
+               left &= ~0x3fu;
+       }
+       return left;
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7"    );
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(pc,    "config:19"     );
+PMU_FORMAT_ATTR(any,   "config:21"     ); /* v3 + */
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+PMU_FORMAT_ATTR(in_tx,  "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
+
+static struct attribute *intel_arch_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_pc.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+ssize_t intel_event_sysfs_show(char *page, u64 config)
+{
+       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
+
+       return x86_event_sysfs_show(page, config, event);
+}
+
+struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+       struct intel_shared_regs *regs;
+       int i;
+
+       regs = kzalloc_node(sizeof(struct intel_shared_regs),
+                           GFP_KERNEL, cpu_to_node(cpu));
+       if (regs) {
+               /*
+                * initialize the locks to keep lockdep happy
+                */
+               for (i = 0; i < EXTRA_REG_MAX; i++)
+                       raw_spin_lock_init(&regs->regs[i].lock);
+
+               regs->core_id = -1;
+       }
+       return regs;
+}
+
+static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+{
+       struct intel_excl_cntrs *c;
+
+       c = kzalloc_node(sizeof(struct intel_excl_cntrs),
+                        GFP_KERNEL, cpu_to_node(cpu));
+       if (c) {
+               raw_spin_lock_init(&c->lock);
+               c->core_id = -1;
+       }
+       return c;
+}
+
+static int intel_pmu_cpu_prepare(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       goto err;
+       }
+
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
+
+               cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+               if (!cpuc->constraint_list)
+                       goto err_shared_regs;
+
+               cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+               if (!cpuc->excl_cntrs)
+                       goto err_constraint_list;
+
+               cpuc->excl_thread_id = 0;
+       }
+
+       return NOTIFY_OK;
+
+err_constraint_list:
+       kfree(cpuc->constraint_list);
+       cpuc->constraint_list = NULL;
+
+err_shared_regs:
+       kfree(cpuc->shared_regs);
+       cpuc->shared_regs = NULL;
+
+err:
+       return NOTIFY_BAD;
+}
+
+static void intel_pmu_cpu_starting(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       int core_id = topology_core_id(cpu);
+       int i;
+
+       init_debug_store_on_cpu(cpu);
+       /*
+        * Deal with CPUs that don't clear their LBRs on power-up.
+        */
+       intel_pmu_lbr_reset();
+
+       cpuc->lbr_sel = NULL;
+
+       if (!cpuc->shared_regs)
+               return;
+
+       if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
+               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+                       struct intel_shared_regs *pc;
+
+                       pc = per_cpu(cpu_hw_events, i).shared_regs;
+                       if (pc && pc->core_id == core_id) {
+                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
+                               cpuc->shared_regs = pc;
+                               break;
+                       }
+               }
+               cpuc->shared_regs->core_id = core_id;
+               cpuc->shared_regs->refcnt++;
+       }
+
+       if (x86_pmu.lbr_sel_map)
+               cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
+
+       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+                       struct intel_excl_cntrs *c;
+
+                       c = per_cpu(cpu_hw_events, i).excl_cntrs;
+                       if (c && c->core_id == core_id) {
+                               cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
+                               cpuc->excl_cntrs = c;
+                               cpuc->excl_thread_id = 1;
+                               break;
+                       }
+               }
+               cpuc->excl_cntrs->core_id = core_id;
+               cpuc->excl_cntrs->refcnt++;
+       }
+}
+
+static void free_excl_cntrs(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       struct intel_excl_cntrs *c;
+
+       c = cpuc->excl_cntrs;
+       if (c) {
+               if (c->core_id == -1 || --c->refcnt == 0)
+                       kfree(c);
+               cpuc->excl_cntrs = NULL;
+               kfree(cpuc->constraint_list);
+               cpuc->constraint_list = NULL;
+       }
+}
+
+static void intel_pmu_cpu_dying(int cpu)
+{
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       struct intel_shared_regs *pc;
+
+       pc = cpuc->shared_regs;
+       if (pc) {
+               if (pc->core_id == -1 || --pc->refcnt == 0)
+                       kfree(pc);
+               cpuc->shared_regs = NULL;
+       }
+
+       free_excl_cntrs(cpu);
+
+       fini_debug_store_on_cpu(cpu);
+}
+
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+                                bool sched_in)
+{
+       if (x86_pmu.pebs_active)
+               intel_pmu_pebs_sched_task(ctx, sched_in);
+       if (x86_pmu.lbr_nr)
+               intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
+PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
+
+PMU_FORMAT_ATTR(ldlat, "config1:0-15");
+
+PMU_FORMAT_ATTR(frontend, "config1:0-23");
+
+static struct attribute *intel_arch3_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_pc.attr,
+       &format_attr_any.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       &format_attr_in_tx.attr,
+       &format_attr_in_tx_cp.attr,
+
+       &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
+       &format_attr_ldlat.attr, /* PEBS load latency */
+       NULL,
+};
+
+static struct attribute *skl_format_attr[] = {
+       &format_attr_frontend.attr,
+       NULL,
+};
+
+static __initconst const struct x86_pmu core_pmu = {
+       .name                   = "core",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = x86_pmu_disable_all,
+       .enable_all             = core_pmu_enable_all,
+       .enable                 = core_pmu_enable_event,
+       .disable                = x86_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
+       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
+       .event_map              = intel_pmu_event_map,
+       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
+       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
+
+       /*
+        * Intel PMCs cannot be accessed sanely above 32-bit width,
+        * so we install an artificial 1<<31 period regardless of
+        * the generic event period:
+        */
+       .max_period             = (1ULL<<31) - 1,
+       .get_event_constraints  = intel_get_event_constraints,
+       .put_event_constraints  = intel_put_event_constraints,
+       .event_constraints      = intel_core_event_constraints,
+       .guest_get_msrs         = core_guest_get_msrs,
+       .format_attrs           = intel_arch_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+       /*
+        * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
+        * together with PMU version 1 and thus be using core_pmu with
+        * shared_regs. We need following callbacks here to allocate
+        * it properly.
+        */
+       .cpu_prepare            = intel_pmu_cpu_prepare,
+       .cpu_starting           = intel_pmu_cpu_starting,
+       .cpu_dying              = intel_pmu_cpu_dying,
+};
+
+static __initconst const struct x86_pmu intel_pmu = {
+       .name                   = "Intel",
+       .handle_irq             = intel_pmu_handle_irq,
+       .disable_all            = intel_pmu_disable_all,
+       .enable_all             = intel_pmu_enable_all,
+       .enable                 = intel_pmu_enable_event,
+       .disable                = intel_pmu_disable_event,
+       .hw_config              = intel_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
+       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
+       .event_map              = intel_pmu_event_map,
+       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
+       .apic                   = 1,
+       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
+       /*
+        * Intel PMCs cannot be accessed sanely above 32 bit width,
+        * so we install an artificial 1<<31 period regardless of
+        * the generic event period:
+        */
+       .max_period             = (1ULL << 31) - 1,
+       .get_event_constraints  = intel_get_event_constraints,
+       .put_event_constraints  = intel_put_event_constraints,
+       .pebs_aliases           = intel_pebs_aliases_core2,
+
+       .format_attrs           = intel_arch3_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+       .cpu_prepare            = intel_pmu_cpu_prepare,
+       .cpu_starting           = intel_pmu_cpu_starting,
+       .cpu_dying              = intel_pmu_cpu_dying,
+       .guest_get_msrs         = intel_guest_get_msrs,
+       .sched_task             = intel_pmu_sched_task,
+};
+
+static __init void intel_clovertown_quirk(void)
+{
+       /*
+        * PEBS is unreliable due to:
+        *
+        *   AJ67  - PEBS may experience CPL leaks
+        *   AJ68  - PEBS PMI may be delayed by one event
+        *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
+        *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
+        *
+        * AJ67 could be worked around by restricting the OS/USR flags.
+        * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
+        *
+        * AJ106 could possibly be worked around by not allowing LBR
+        *       usage from PEBS, including the fixup.
+        * AJ68  could possibly be worked around by always programming
+        *       a pebs_event_reset[0] value and coping with the lost events.
+        *
+        * But taken together it might just make sense to not enable PEBS on
+        * these chips.
+        */
+       pr_warn("PEBS disabled due to CPU errata\n");
+       x86_pmu.pebs = 0;
+       x86_pmu.pebs_constraints = NULL;
+}
+
+static int intel_snb_pebs_broken(int cpu)
+{
+       u32 rev = UINT_MAX; /* default to broken for unknown models */
+
+       switch (cpu_data(cpu).x86_model) {
+       case 42: /* SNB */
+               rev = 0x28;
+               break;
+
+       case 45: /* SNB-EP */
+               switch (cpu_data(cpu).x86_mask) {
+               case 6: rev = 0x618; break;
+               case 7: rev = 0x70c; break;
+               }
+       }
+
+       return (cpu_data(cpu).microcode < rev);
+}
+
+static void intel_snb_check_microcode(void)
+{
+       int pebs_broken = 0;
+       int cpu;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               if ((pebs_broken = intel_snb_pebs_broken(cpu)))
+                       break;
+       }
+       put_online_cpus();
+
+       if (pebs_broken == x86_pmu.pebs_broken)
+               return;
+
+       /*
+        * Serialized by the microcode lock..
+        */
+       if (x86_pmu.pebs_broken) {
+               pr_info("PEBS enabled due to microcode update\n");
+               x86_pmu.pebs_broken = 0;
+       } else {
+               pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
+               x86_pmu.pebs_broken = 1;
+       }
+}
+
+/*
+ * Under certain circumstances, access certain MSR may cause #GP.
+ * The function tests if the input MSR can be safely accessed.
+ */
+static bool check_msr(unsigned long msr, u64 mask)
+{
+       u64 val_old, val_new, val_tmp;
+
+       /*
+        * Read the current value, change it and read it back to see if it
+        * matches, this is needed to detect certain hardware emulators
+        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+        */
+       if (rdmsrl_safe(msr, &val_old))
+               return false;
+
+       /*
+        * Only change the bits which can be updated by wrmsrl.
+        */
+       val_tmp = val_old ^ mask;
+       if (wrmsrl_safe(msr, val_tmp) ||
+           rdmsrl_safe(msr, &val_new))
+               return false;
+
+       if (val_new != val_tmp)
+               return false;
+
+       /* Here it's sure that the MSR can be safely accessed.
+        * Restore the old value and return.
+        */
+       wrmsrl(msr, val_old);
+
+       return true;
+}
+
+static __init void intel_sandybridge_quirk(void)
+{
+       x86_pmu.check_microcode = intel_snb_check_microcode;
+       intel_snb_check_microcode();
+}
+
+static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
+       { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
+       { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
+       { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
+       { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
+       { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
+       { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
+       { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
+};
+
+static __init void intel_arch_events_quirk(void)
+{
+       int bit;
+
+       /* disable event that reported as not presend by cpuid */
+       for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
+               intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
+               pr_warn("CPUID marked event: \'%s\' unavailable\n",
+                       intel_arch_events_map[bit].name);
+       }
+}
+
+static __init void intel_nehalem_quirk(void)
+{
+       union cpuid10_ebx ebx;
+
+       ebx.full = x86_pmu.events_maskl;
+       if (ebx.split.no_branch_misses_retired) {
+               /*
+                * Erratum AAJ80 detected, we work it around by using
+                * the BR_MISP_EXEC.ANY event. This will over-count
+                * branch-misses, but it's still much better than the
+                * architectural event which is often completely bogus:
+                */
+               intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+               ebx.split.no_branch_misses_retired = 0;
+               x86_pmu.events_maskl = ebx.full;
+               pr_info("CPU erratum AAJ80 worked around\n");
+       }
+}
+
+/*
+ * enable software workaround for errata:
+ * SNB: BJ122
+ * IVB: BV98
+ * HSW: HSD29
+ *
+ * Only needed when HT is enabled. However detecting
+ * if HT is enabled is difficult (model specific). So instead,
+ * we enable the workaround in the early boot, and verify if
+ * it is needed in a later initcall phase once we have valid
+ * topology information to check if HT is actually enabled
+ */
+static __init void intel_ht_bug(void)
+{
+       x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
+
+       x86_pmu.start_scheduling = intel_start_scheduling;
+       x86_pmu.commit_scheduling = intel_commit_scheduling;
+       x86_pmu.stop_scheduling = intel_stop_scheduling;
+}
+
+EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start,       tx_start,       "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit,      tx_commit,      "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort,       tx_abort,       "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity,    tx_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict,    tx_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start,       el_start,       "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit,      el_commit,      "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort,       el_abort,       "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity,    el_capacity,    "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict,    el_conflict,    "event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t,       cycles_t,       "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct,      cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
+
+static struct attribute *hsw_events_attrs[] = {
+       EVENT_PTR(tx_start),
+       EVENT_PTR(tx_commit),
+       EVENT_PTR(tx_abort),
+       EVENT_PTR(tx_capacity),
+       EVENT_PTR(tx_conflict),
+       EVENT_PTR(el_start),
+       EVENT_PTR(el_commit),
+       EVENT_PTR(el_abort),
+       EVENT_PTR(el_capacity),
+       EVENT_PTR(el_conflict),
+       EVENT_PTR(cycles_t),
+       EVENT_PTR(cycles_ct),
+       EVENT_PTR(mem_ld_hsw),
+       EVENT_PTR(mem_st_hsw),
+       NULL
+};
+
+__init int intel_pmu_init(void)
+{
+       union cpuid10_edx edx;
+       union cpuid10_eax eax;
+       union cpuid10_ebx ebx;
+       struct event_constraint *c;
+       unsigned int unused;
+       struct extra_reg *er;
+       int version, i;
+
+       if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+               switch (boot_cpu_data.x86) {
+               case 0x6:
+                       return p6_pmu_init();
+               case 0xb:
+                       return knc_pmu_init();
+               case 0xf:
+                       return p4_pmu_init();
+               }
+               return -ENODEV;
+       }
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Branch Misses Retired hw_event or not.
+        */
+       cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
+       if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
+               return -ENODEV;
+
+       version = eax.split.version_id;
+       if (version < 2)
+               x86_pmu = core_pmu;
+       else
+               x86_pmu = intel_pmu;
+
+       x86_pmu.version                 = version;
+       x86_pmu.num_counters            = eax.split.num_counters;
+       x86_pmu.cntval_bits             = eax.split.bit_width;
+       x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
+
+       x86_pmu.events_maskl            = ebx.full;
+       x86_pmu.events_mask_len         = eax.split.mask_length;
+
+       x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
+       /*
+        * Quirk: v2 perfmon does not report fixed-purpose events, so
+        * assume at least 3 events:
+        */
+       if (version > 1)
+               x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+
+       if (boot_cpu_has(X86_FEATURE_PDCM)) {
+               u64 capabilities;
+
+               rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+               x86_pmu.intel_cap.capabilities = capabilities;
+       }
+
+       intel_ds_init();
+
+       x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+
+       /*
+        * Install the hw-cache-events table:
+        */
+       switch (boot_cpu_data.x86_model) {
+       case 14: /* 65nm Core "Yonah" */
+               pr_cont("Core events, ");
+               break;
+
+       case 15: /* 65nm Core2 "Merom"          */
+               x86_add_quirk(intel_clovertown_quirk);
+       case 22: /* 65nm Core2 "Merom-L"        */
+       case 23: /* 45nm Core2 "Penryn"         */
+       case 29: /* 45nm Core2 "Dunnington (MP) */
+               memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+
+               intel_pmu_lbr_init_core();
+
+               x86_pmu.event_constraints = intel_core2_event_constraints;
+               x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
+               pr_cont("Core2 events, ");
+               break;
+
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+               memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_nhm();
+
+               x86_pmu.event_constraints = intel_nehalem_event_constraints;
+               x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
+               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+               x86_pmu.extra_regs = intel_nehalem_extra_regs;
+
+               x86_pmu.cpu_events = nhm_events_attrs;
+
+               /* UOPS_ISSUED.STALLED_CYCLES */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+
+               intel_pmu_pebs_data_source_nhm();
+               x86_add_quirk(intel_nehalem_quirk);
+
+               pr_cont("Nehalem events, ");
+               break;
+
+       case 28: /* 45nm Atom "Pineview"   */
+       case 38: /* 45nm Atom "Lincroft"   */
+       case 39: /* 32nm Atom "Penwell"    */
+       case 53: /* 32nm Atom "Cloverview" */
+       case 54: /* 32nm Atom "Cedarview"  */
+               memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+
+               intel_pmu_lbr_init_atom();
+
+               x86_pmu.event_constraints = intel_gen_event_constraints;
+               x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
+               pr_cont("Atom events, ");
+               break;
+
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 76: /* 14nm Atom "Airmont"                   */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+               memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+                       sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_atom();
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_slm_extra_regs;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               pr_cont("Silvermont events, ");
+               break;
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+               memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_nhm();
+
+               x86_pmu.event_constraints = intel_westmere_event_constraints;
+               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+               x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_westmere_extra_regs;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+
+               x86_pmu.cpu_events = nhm_events_attrs;
+
+               /* UOPS_ISSUED.STALLED_CYCLES */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
+
+               intel_pmu_pebs_data_source_nhm();
+               pr_cont("Westmere events, ");
+               break;
+
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+               x86_add_quirk(intel_sandybridge_quirk);
+               x86_add_quirk(intel_ht_bug);
+               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_snb();
+
+               x86_pmu.event_constraints = intel_snb_event_constraints;
+               x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+               if (boot_cpu_data.x86_model == 45)
+                       x86_pmu.extra_regs = intel_snbep_extra_regs;
+               else
+                       x86_pmu.extra_regs = intel_snb_extra_regs;
+
+
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.cpu_events = snb_events_attrs;
+
+               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+               /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+                       X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
+
+               pr_cont("SandyBridge events, ");
+               break;
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+               x86_add_quirk(intel_ht_bug);
+               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               /* dTLB-load-misses on IVB is different than SNB */
+               hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
+
+               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_snb();
+
+               x86_pmu.event_constraints = intel_ivb_event_constraints;
+               x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+               x86_pmu.pebs_prec_dist = true;
+               if (boot_cpu_data.x86_model == 62)
+                       x86_pmu.extra_regs = intel_snbep_extra_regs;
+               else
+                       x86_pmu.extra_regs = intel_snb_extra_regs;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.cpu_events = snb_events_attrs;
+
+               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+
+               pr_cont("IvyBridge events, ");
+               break;
+
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+               x86_add_quirk(intel_ht_bug);
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_hsw();
+
+               x86_pmu.event_constraints = intel_hsw_event_constraints;
+               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_snbep_extra_regs;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+               x86_pmu.pebs_prec_dist = true;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
+               x86_pmu.cpu_events = hsw_events_attrs;
+               x86_pmu.lbr_double_abort = true;
+               pr_cont("Haswell events, ");
+               break;
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+               /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
+               hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
+                                                                        BDW_L3_MISS|HSW_SNOOP_DRAM;
+               hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
+                                                                         HSW_SNOOP_DRAM;
+               hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
+                                                                            BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+               hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
+                                                                             BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
+
+               intel_pmu_lbr_init_hsw();
+
+               x86_pmu.event_constraints = intel_bdw_event_constraints;
+               x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_snbep_extra_regs;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
+               x86_pmu.pebs_prec_dist = true;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
+               x86_pmu.cpu_events = hsw_events_attrs;
+               x86_pmu.limit_period = bdw_limit_period;
+               pr_cont("Broadwell events, ");
+               break;
+
+       case 87: /* Knights Landing Xeon Phi */
+               memcpy(hw_cache_event_ids,
+                      slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs,
+                      knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+               intel_pmu_lbr_init_knl();
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_knl_extra_regs;
+
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               pr_cont("Knights Landing events, ");
+               break;
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+               intel_pmu_lbr_init_skl();
+
+               x86_pmu.event_constraints = intel_skl_event_constraints;
+               x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_skl_extra_regs;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
+               x86_pmu.pebs_prec_dist = true;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
+               x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
+                                                 skl_format_attr);
+               WARN_ON(!x86_pmu.format_attrs);
+               x86_pmu.cpu_events = hsw_events_attrs;
+               pr_cont("Skylake events, ");
+               break;
+
+       default:
+               switch (x86_pmu.version) {
+               case 1:
+                       x86_pmu.event_constraints = intel_v1_event_constraints;
+                       pr_cont("generic architected perfmon v1, ");
+                       break;
+               default:
+                       /*
+                        * default constraints for v2 and up
+                        */
+                       x86_pmu.event_constraints = intel_gen_event_constraints;
+                       pr_cont("generic architected perfmon, ");
+                       break;
+               }
+       }
+
+       if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
+               WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+                    x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
+               x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
+       }
+       x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+       if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
+               WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+                    x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
+               x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+       }
+
+       x86_pmu.intel_ctrl |=
+               ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+       if (x86_pmu.event_constraints) {
+               /*
+                * event on fixed counter2 (REF_CYCLES) only works on this
+                * counter, so do not extend mask to generic counters
+                */
+               for_each_event_constraint(c, x86_pmu.event_constraints) {
+                       if (c->cmask == FIXED_EVENT_FLAGS
+                           && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+                               c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+                       }
+                       c->idxmsk64 &=
+                               ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
+                       c->weight = hweight64(c->idxmsk64);
+               }
+       }
+
+       /*
+        * Access LBR MSR may cause #GP under certain circumstances.
+        * E.g. KVM doesn't support LBR MSR
+        * Check all LBT MSR here.
+        * Disable LBR access if any LBR MSRs can not be accessed.
+        */
+       if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+               x86_pmu.lbr_nr = 0;
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
+                     check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
+                       x86_pmu.lbr_nr = 0;
+       }
+
+       /*
+        * Access extra MSR may cause #GP under certain circumstances.
+        * E.g. KVM doesn't support offcore event
+        * Check all extra_regs here.
+        */
+       if (x86_pmu.extra_regs) {
+               for (er = x86_pmu.extra_regs; er->msr; er++) {
+                       er->extra_msr_access = check_msr(er->msr, 0x11UL);
+                       /* Disable LBR select mapping */
+                       if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
+                               x86_pmu.lbr_sel_map = NULL;
+               }
+       }
+
+       /* Support full width counters using alternative MSR range */
+       if (x86_pmu.intel_cap.full_width_write) {
+               x86_pmu.max_period = x86_pmu.cntval_mask;
+               x86_pmu.perfctr = MSR_IA32_PMC0;
+               pr_cont("full-width counters, ");
+       }
+
+       return 0;
+}
+
+/*
+ * HT bug: phase 2 init
+ * Called once we have valid topology information to check
+ * whether or not HT is enabled
+ * If HT is off, then we disable the workaround
+ */
+static __init int fixup_ht_bug(void)
+{
+       int cpu = smp_processor_id();
+       int w, c;
+       /*
+        * problem not present on this CPU model, nothing to do
+        */
+       if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
+               return 0;
+
+       w = cpumask_weight(topology_sibling_cpumask(cpu));
+       if (w > 1) {
+               pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
+               return 0;
+       }
+
+       if (lockup_detector_suspend() != 0) {
+               pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
+               return 0;
+       }
+
+       x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
+
+       x86_pmu.start_scheduling = NULL;
+       x86_pmu.commit_scheduling = NULL;
+       x86_pmu.stop_scheduling = NULL;
+
+       lockup_detector_resume();
+
+       get_online_cpus();
+
+       for_each_online_cpu(c) {
+               free_excl_cntrs(c);
+       }
+
+       put_online_cpus();
+       pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
+       return 0;
+}
+subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
new file mode 100644 (file)
index 0000000..93cb412
--- /dev/null
@@ -0,0 +1,1381 @@
+/*
+ * Intel Cache Quality-of-Service Monitoring (CQM) support.
+ *
+ * Based very, very heavily on work by Peter Zijlstra.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define MSR_IA32_PQR_ASSOC     0x0c8f
+#define MSR_IA32_QM_CTR                0x0c8e
+#define MSR_IA32_QM_EVTSEL     0x0c8d
+
+static u32 cqm_max_rmid = -1;
+static unsigned int cqm_l3_scale; /* supposedly cacheline size */
+
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid:              The cached Resource Monitoring ID
+ * @closid:            The cached Class Of Service ID
+ * @rmid_usecnt:       The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
+       u32                     rmid;
+       u32                     closid;
+       int                     rmid_usecnt;
+};
+
+/*
+ * The cached intel_pqr_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+
+/*
+ * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
+ * Also protects event->hw.cqm_rmid
+ *
+ * Hold either for stability, both for modification of ->hw.cqm_rmid.
+ */
+static DEFINE_MUTEX(cache_mutex);
+static DEFINE_RAW_SPINLOCK(cache_lock);
+
+/*
+ * Groups of events that have the same target(s), one RMID per group.
+ */
+static LIST_HEAD(cache_groups);
+
+/*
+ * Mask of CPUs for reading CQM values. We only need one per-socket.
+ */
+static cpumask_t cqm_cpumask;
+
+#define RMID_VAL_ERROR         (1ULL << 63)
+#define RMID_VAL_UNAVAIL       (1ULL << 62)
+
+#define QOS_L3_OCCUP_EVENT_ID  (1 << 0)
+
+#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
+
+/*
+ * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
+ *
+ * This rmid is always free and is guaranteed to have an associated
+ * near-zero occupancy value, i.e. no cachelines are tagged with this
+ * RMID, once __intel_cqm_rmid_rotate() returns.
+ */
+static u32 intel_cqm_rotation_rmid;
+
+#define INVALID_RMID           (-1)
+
+/*
+ * Is @rmid valid for programming the hardware?
+ *
+ * rmid 0 is reserved by the hardware for all non-monitored tasks, which
+ * means that we should never come across an rmid with that value.
+ * Likewise, an rmid value of -1 is used to indicate "no rmid currently
+ * assigned" and is used as part of the rotation code.
+ */
+static inline bool __rmid_valid(u32 rmid)
+{
+       if (!rmid || rmid == INVALID_RMID)
+               return false;
+
+       return true;
+}
+
+static u64 __rmid_read(u32 rmid)
+{
+       u64 val;
+
+       /*
+        * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
+        * it just says that to increase confusion.
+        */
+       wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
+       rdmsrl(MSR_IA32_QM_CTR, val);
+
+       /*
+        * Aside from the ERROR and UNAVAIL bits, assume this thing returns
+        * the number of cachelines tagged with @rmid.
+        */
+       return val;
+}
+
+enum rmid_recycle_state {
+       RMID_YOUNG = 0,
+       RMID_AVAILABLE,
+       RMID_DIRTY,
+};
+
+struct cqm_rmid_entry {
+       u32 rmid;
+       enum rmid_recycle_state state;
+       struct list_head list;
+       unsigned long queue_time;
+};
+
+/*
+ * cqm_rmid_free_lru - A least recently used list of RMIDs.
+ *
+ * Oldest entry at the head, newest (most recently used) entry at the
+ * tail. This list is never traversed, it's only used to keep track of
+ * the lru order. That is, we only pick entries of the head or insert
+ * them on the tail.
+ *
+ * All entries on the list are 'free', and their RMIDs are not currently
+ * in use. To mark an RMID as in use, remove its entry from the lru
+ * list.
+ *
+ *
+ * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
+ *
+ * This list is contains RMIDs that no one is currently using but that
+ * may have a non-zero occupancy value associated with them. The
+ * rotation worker moves RMIDs from the limbo list to the free list once
+ * the occupancy value drops below __intel_cqm_threshold.
+ *
+ * Both lists are protected by cache_mutex.
+ */
+static LIST_HEAD(cqm_rmid_free_lru);
+static LIST_HEAD(cqm_rmid_limbo_lru);
+
+/*
+ * We use a simple array of pointers so that we can lookup a struct
+ * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
+ * and __put_rmid() from having to worry about dealing with struct
+ * cqm_rmid_entry - they just deal with rmids, i.e. integers.
+ *
+ * Once this array is initialized it is read-only. No locks are required
+ * to access it.
+ *
+ * All entries for all RMIDs can be looked up in the this array at all
+ * times.
+ */
+static struct cqm_rmid_entry **cqm_rmid_ptrs;
+
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
+{
+       struct cqm_rmid_entry *entry;
+
+       entry = cqm_rmid_ptrs[rmid];
+       WARN_ON(entry->rmid != rmid);
+
+       return entry;
+}
+
+/*
+ * Returns < 0 on fail.
+ *
+ * We expect to be called with cache_mutex held.
+ */
+static u32 __get_rmid(void)
+{
+       struct cqm_rmid_entry *entry;
+
+       lockdep_assert_held(&cache_mutex);
+
+       if (list_empty(&cqm_rmid_free_lru))
+               return INVALID_RMID;
+
+       entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
+       list_del(&entry->list);
+
+       return entry->rmid;
+}
+
+static void __put_rmid(u32 rmid)
+{
+       struct cqm_rmid_entry *entry;
+
+       lockdep_assert_held(&cache_mutex);
+
+       WARN_ON(!__rmid_valid(rmid));
+       entry = __rmid_entry(rmid);
+
+       entry->queue_time = jiffies;
+       entry->state = RMID_YOUNG;
+
+       list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
+}
+
+static int intel_cqm_setup_rmid_cache(void)
+{
+       struct cqm_rmid_entry *entry;
+       unsigned int nr_rmids;
+       int r = 0;
+
+       nr_rmids = cqm_max_rmid + 1;
+       cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
+                               nr_rmids, GFP_KERNEL);
+       if (!cqm_rmid_ptrs)
+               return -ENOMEM;
+
+       for (; r <= cqm_max_rmid; r++) {
+               struct cqm_rmid_entry *entry;
+
+               entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+               if (!entry)
+                       goto fail;
+
+               INIT_LIST_HEAD(&entry->list);
+               entry->rmid = r;
+               cqm_rmid_ptrs[r] = entry;
+
+               list_add_tail(&entry->list, &cqm_rmid_free_lru);
+       }
+
+       /*
+        * RMID 0 is special and is always allocated. It's used for all
+        * tasks that are not monitored.
+        */
+       entry = __rmid_entry(0);
+       list_del(&entry->list);
+
+       mutex_lock(&cache_mutex);
+       intel_cqm_rotation_rmid = __get_rmid();
+       mutex_unlock(&cache_mutex);
+
+       return 0;
+fail:
+       while (r--)
+               kfree(cqm_rmid_ptrs[r]);
+
+       kfree(cqm_rmid_ptrs);
+       return -ENOMEM;
+}
+
+/*
+ * Determine if @a and @b measure the same set of tasks.
+ *
+ * If @a and @b measure the same set of tasks then we want to share a
+ * single RMID.
+ */
+static bool __match_event(struct perf_event *a, struct perf_event *b)
+{
+       /* Per-cpu and task events don't mix */
+       if ((a->attach_state & PERF_ATTACH_TASK) !=
+           (b->attach_state & PERF_ATTACH_TASK))
+               return false;
+
+#ifdef CONFIG_CGROUP_PERF
+       if (a->cgrp != b->cgrp)
+               return false;
+#endif
+
+       /* If not task event, we're machine wide */
+       if (!(b->attach_state & PERF_ATTACH_TASK))
+               return true;
+
+       /*
+        * Events that target same task are placed into the same cache group.
+        */
+       if (a->hw.target == b->hw.target)
+               return true;
+
+       /*
+        * Are we an inherited event?
+        */
+       if (b->parent == a)
+               return true;
+
+       return false;
+}
+
+#ifdef CONFIG_CGROUP_PERF
+static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
+{
+       if (event->attach_state & PERF_ATTACH_TASK)
+               return perf_cgroup_from_task(event->hw.target, event->ctx);
+
+       return event->cgrp;
+}
+#endif
+
+/*
+ * Determine if @a's tasks intersect with @b's tasks
+ *
+ * There are combinations of events that we explicitly prohibit,
+ *
+ *                PROHIBITS
+ *     system-wide    ->       cgroup and task
+ *     cgroup        ->        system-wide
+ *                           ->        task in cgroup
+ *     task          ->        system-wide
+ *                           ->        task in cgroup
+ *
+ * Call this function before allocating an RMID.
+ */
+static bool __conflict_event(struct perf_event *a, struct perf_event *b)
+{
+#ifdef CONFIG_CGROUP_PERF
+       /*
+        * We can have any number of cgroups but only one system-wide
+        * event at a time.
+        */
+       if (a->cgrp && b->cgrp) {
+               struct perf_cgroup *ac = a->cgrp;
+               struct perf_cgroup *bc = b->cgrp;
+
+               /*
+                * This condition should have been caught in
+                * __match_event() and we should be sharing an RMID.
+                */
+               WARN_ON_ONCE(ac == bc);
+
+               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+                       return true;
+
+               return false;
+       }
+
+       if (a->cgrp || b->cgrp) {
+               struct perf_cgroup *ac, *bc;
+
+               /*
+                * cgroup and system-wide events are mutually exclusive
+                */
+               if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
+                   (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
+                       return true;
+
+               /*
+                * Ensure neither event is part of the other's cgroup
+                */
+               ac = event_to_cgroup(a);
+               bc = event_to_cgroup(b);
+               if (ac == bc)
+                       return true;
+
+               /*
+                * Must have cgroup and non-intersecting task events.
+                */
+               if (!ac || !bc)
+                       return false;
+
+               /*
+                * We have cgroup and task events, and the task belongs
+                * to a cgroup. Check for for overlap.
+                */
+               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
+                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
+                       return true;
+
+               return false;
+       }
+#endif
+       /*
+        * If one of them is not a task, same story as above with cgroups.
+        */
+       if (!(a->attach_state & PERF_ATTACH_TASK) ||
+           !(b->attach_state & PERF_ATTACH_TASK))
+               return true;
+
+       /*
+        * Must be non-overlapping.
+        */
+       return false;
+}
+
+struct rmid_read {
+       u32 rmid;
+       atomic64_t value;
+};
+
+static void __intel_cqm_event_count(void *info);
+
+/*
+ * Exchange the RMID of a group of events.
+ */
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
+{
+       struct perf_event *event;
+       struct list_head *head = &group->hw.cqm_group_entry;
+       u32 old_rmid = group->hw.cqm_rmid;
+
+       lockdep_assert_held(&cache_mutex);
+
+       /*
+        * If our RMID is being deallocated, perform a read now.
+        */
+       if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
+               struct rmid_read rr = {
+                       .value = ATOMIC64_INIT(0),
+                       .rmid = old_rmid,
+               };
+
+               on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
+                                &rr, 1);
+               local64_set(&group->count, atomic64_read(&rr.value));
+       }
+
+       raw_spin_lock_irq(&cache_lock);
+
+       group->hw.cqm_rmid = rmid;
+       list_for_each_entry(event, head, hw.cqm_group_entry)
+               event->hw.cqm_rmid = rmid;
+
+       raw_spin_unlock_irq(&cache_lock);
+
+       return old_rmid;
+}
+
+/*
+ * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
+ * cachelines are still tagged with RMIDs in limbo, we progressively
+ * increment the threshold until we find an RMID in limbo with <=
+ * __intel_cqm_threshold lines tagged. This is designed to mitigate the
+ * problem where cachelines tagged with an RMID are not steadily being
+ * evicted.
+ *
+ * On successful rotations we decrease the threshold back towards zero.
+ *
+ * __intel_cqm_max_threshold provides an upper bound on the threshold,
+ * and is measured in bytes because it's exposed to userland.
+ */
+static unsigned int __intel_cqm_threshold;
+static unsigned int __intel_cqm_max_threshold;
+
+/*
+ * Test whether an RMID has a zero occupancy value on this cpu.
+ */
+static void intel_cqm_stable(void *arg)
+{
+       struct cqm_rmid_entry *entry;
+
+       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+               if (entry->state != RMID_AVAILABLE)
+                       break;
+
+               if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
+                       entry->state = RMID_DIRTY;
+       }
+}
+
+/*
+ * If we have group events waiting for an RMID that don't conflict with
+ * events already running, assign @rmid.
+ */
+static bool intel_cqm_sched_in_event(u32 rmid)
+{
+       struct perf_event *leader, *event;
+
+       lockdep_assert_held(&cache_mutex);
+
+       leader = list_first_entry(&cache_groups, struct perf_event,
+                                 hw.cqm_groups_entry);
+       event = leader;
+
+       list_for_each_entry_continue(event, &cache_groups,
+                                    hw.cqm_groups_entry) {
+               if (__rmid_valid(event->hw.cqm_rmid))
+                       continue;
+
+               if (__conflict_event(event, leader))
+                       continue;
+
+               intel_cqm_xchg_rmid(event, rmid);
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * Initially use this constant for both the limbo queue time and the
+ * rotation timer interval, pmu::hrtimer_interval_ms.
+ *
+ * They don't need to be the same, but the two are related since if you
+ * rotate faster than you recycle RMIDs, you may run out of available
+ * RMIDs.
+ */
+#define RMID_DEFAULT_QUEUE_TIME 250    /* ms */
+
+static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
+
+/*
+ * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
+ * @nr_available: number of freeable RMIDs on the limbo list
+ *
+ * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
+ * cachelines are tagged with those RMIDs. After this we can reuse them
+ * and know that the current set of active RMIDs is stable.
+ *
+ * Return %true or %false depending on whether stabilization needs to be
+ * reattempted.
+ *
+ * If we return %true then @nr_available is updated to indicate the
+ * number of RMIDs on the limbo list that have been queued for the
+ * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
+ * are above __intel_cqm_threshold.
+ */
+static bool intel_cqm_rmid_stabilize(unsigned int *available)
+{
+       struct cqm_rmid_entry *entry, *tmp;
+
+       lockdep_assert_held(&cache_mutex);
+
+       *available = 0;
+       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
+               unsigned long min_queue_time;
+               unsigned long now = jiffies;
+
+               /*
+                * We hold RMIDs placed into limbo for a minimum queue
+                * time. Before the minimum queue time has elapsed we do
+                * not recycle RMIDs.
+                *
+                * The reasoning is that until a sufficient time has
+                * passed since we stopped using an RMID, any RMID
+                * placed onto the limbo list will likely still have
+                * data tagged in the cache, which means we'll probably
+                * fail to recycle it anyway.
+                *
+                * We can save ourselves an expensive IPI by skipping
+                * any RMIDs that have not been queued for the minimum
+                * time.
+                */
+               min_queue_time = entry->queue_time +
+                       msecs_to_jiffies(__rmid_queue_time_ms);
+
+               if (time_after(min_queue_time, now))
+                       break;
+
+               entry->state = RMID_AVAILABLE;
+               (*available)++;
+       }
+
+       /*
+        * Fast return if none of the RMIDs on the limbo list have been
+        * sitting on the queue for the minimum queue time.
+        */
+       if (!*available)
+               return false;
+
+       /*
+        * Test whether an RMID is free for each package.
+        */
+       on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
+
+       list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
+               /*
+                * Exhausted all RMIDs that have waited min queue time.
+                */
+               if (entry->state == RMID_YOUNG)
+                       break;
+
+               if (entry->state == RMID_DIRTY)
+                       continue;
+
+               list_del(&entry->list); /* remove from limbo */
+
+               /*
+                * The rotation RMID gets priority if it's
+                * currently invalid. In which case, skip adding
+                * the RMID to the the free lru.
+                */
+               if (!__rmid_valid(intel_cqm_rotation_rmid)) {
+                       intel_cqm_rotation_rmid = entry->rmid;
+                       continue;
+               }
+
+               /*
+                * If we have groups waiting for RMIDs, hand
+                * them one now provided they don't conflict.
+                */
+               if (intel_cqm_sched_in_event(entry->rmid))
+                       continue;
+
+               /*
+                * Otherwise place it onto the free list.
+                */
+               list_add_tail(&entry->list, &cqm_rmid_free_lru);
+       }
+
+
+       return __rmid_valid(intel_cqm_rotation_rmid);
+}
+
+/*
+ * Pick a victim group and move it to the tail of the group list.
+ * @next: The first group without an RMID
+ */
+static void __intel_cqm_pick_and_rotate(struct perf_event *next)
+{
+       struct perf_event *rotor;
+       u32 rmid;
+
+       lockdep_assert_held(&cache_mutex);
+
+       rotor = list_first_entry(&cache_groups, struct perf_event,
+                                hw.cqm_groups_entry);
+
+       /*
+        * The group at the front of the list should always have a valid
+        * RMID. If it doesn't then no groups have RMIDs assigned and we
+        * don't need to rotate the list.
+        */
+       if (next == rotor)
+               return;
+
+       rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
+       __put_rmid(rmid);
+
+       list_rotate_left(&cache_groups);
+}
+
+/*
+ * Deallocate the RMIDs from any events that conflict with @event, and
+ * place them on the back of the group list.
+ */
+static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
+{
+       struct perf_event *group, *g;
+       u32 rmid;
+
+       lockdep_assert_held(&cache_mutex);
+
+       list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
+               if (group == event)
+                       continue;
+
+               rmid = group->hw.cqm_rmid;
+
+               /*
+                * Skip events that don't have a valid RMID.
+                */
+               if (!__rmid_valid(rmid))
+                       continue;
+
+               /*
+                * No conflict? No problem! Leave the event alone.
+                */
+               if (!__conflict_event(group, event))
+                       continue;
+
+               intel_cqm_xchg_rmid(group, INVALID_RMID);
+               __put_rmid(rmid);
+       }
+}
+
+/*
+ * Attempt to rotate the groups and assign new RMIDs.
+ *
+ * We rotate for two reasons,
+ *   1. To handle the scheduling of conflicting events
+ *   2. To recycle RMIDs
+ *
+ * Rotating RMIDs is complicated because the hardware doesn't give us
+ * any clues.
+ *
+ * There's problems with the hardware interface; when you change the
+ * task:RMID map cachelines retain their 'old' tags, giving a skewed
+ * picture. In order to work around this, we must always keep one free
+ * RMID - intel_cqm_rotation_rmid.
+ *
+ * Rotation works by taking away an RMID from a group (the old RMID),
+ * and assigning the free RMID to another group (the new RMID). We must
+ * then wait for the old RMID to not be used (no cachelines tagged).
+ * This ensure that all cachelines are tagged with 'active' RMIDs. At
+ * this point we can start reading values for the new RMID and treat the
+ * old RMID as the free RMID for the next rotation.
+ *
+ * Return %true or %false depending on whether we did any rotating.
+ */
+static bool __intel_cqm_rmid_rotate(void)
+{
+       struct perf_event *group, *start = NULL;
+       unsigned int threshold_limit;
+       unsigned int nr_needed = 0;
+       unsigned int nr_available;
+       bool rotated = false;
+
+       mutex_lock(&cache_mutex);
+
+again:
+       /*
+        * Fast path through this function if there are no groups and no
+        * RMIDs that need cleaning.
+        */
+       if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
+               goto out;
+
+       list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
+               if (!__rmid_valid(group->hw.cqm_rmid)) {
+                       if (!start)
+                               start = group;
+                       nr_needed++;
+               }
+       }
+
+       /*
+        * We have some event groups, but they all have RMIDs assigned
+        * and no RMIDs need cleaning.
+        */
+       if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
+               goto out;
+
+       if (!nr_needed)
+               goto stabilize;
+
+       /*
+        * We have more event groups without RMIDs than available RMIDs,
+        * or we have event groups that conflict with the ones currently
+        * scheduled.
+        *
+        * We force deallocate the rmid of the group at the head of
+        * cache_groups. The first event group without an RMID then gets
+        * assigned intel_cqm_rotation_rmid. This ensures we always make
+        * forward progress.
+        *
+        * Rotate the cache_groups list so the previous head is now the
+        * tail.
+        */
+       __intel_cqm_pick_and_rotate(start);
+
+       /*
+        * If the rotation is going to succeed, reduce the threshold so
+        * that we don't needlessly reuse dirty RMIDs.
+        */
+       if (__rmid_valid(intel_cqm_rotation_rmid)) {
+               intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
+               intel_cqm_rotation_rmid = __get_rmid();
+
+               intel_cqm_sched_out_conflicting_events(start);
+
+               if (__intel_cqm_threshold)
+                       __intel_cqm_threshold--;
+       }
+
+       rotated = true;
+
+stabilize:
+       /*
+        * We now need to stablize the RMID we freed above (if any) to
+        * ensure that the next time we rotate we have an RMID with zero
+        * occupancy value.
+        *
+        * Alternatively, if we didn't need to perform any rotation,
+        * we'll have a bunch of RMIDs in limbo that need stabilizing.
+        */
+       threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
+
+       while (intel_cqm_rmid_stabilize(&nr_available) &&
+              __intel_cqm_threshold < threshold_limit) {
+               unsigned int steal_limit;
+
+               /*
+                * Don't spin if nobody is actively waiting for an RMID,
+                * the rotation worker will be kicked as soon as an
+                * event needs an RMID anyway.
+                */
+               if (!nr_needed)
+                       break;
+
+               /* Allow max 25% of RMIDs to be in limbo. */
+               steal_limit = (cqm_max_rmid + 1) / 4;
+
+               /*
+                * We failed to stabilize any RMIDs so our rotation
+                * logic is now stuck. In order to make forward progress
+                * we have a few options:
+                *
+                *   1. rotate ("steal") another RMID
+                *   2. increase the threshold
+                *   3. do nothing
+                *
+                * We do both of 1. and 2. until we hit the steal limit.
+                *
+                * The steal limit prevents all RMIDs ending up on the
+                * limbo list. This can happen if every RMID has a
+                * non-zero occupancy above threshold_limit, and the
+                * occupancy values aren't dropping fast enough.
+                *
+                * Note that there is prioritisation at work here - we'd
+                * rather increase the number of RMIDs on the limbo list
+                * than increase the threshold, because increasing the
+                * threshold skews the event data (because we reuse
+                * dirty RMIDs) - threshold bumps are a last resort.
+                */
+               if (nr_available < steal_limit)
+                       goto again;
+
+               __intel_cqm_threshold++;
+       }
+
+out:
+       mutex_unlock(&cache_mutex);
+       return rotated;
+}
+
+static void intel_cqm_rmid_rotate(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
+
+static struct pmu intel_cqm_pmu;
+
+static void intel_cqm_rmid_rotate(struct work_struct *work)
+{
+       unsigned long delay;
+
+       __intel_cqm_rmid_rotate();
+
+       delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
+       schedule_delayed_work(&intel_cqm_rmid_work, delay);
+}
+
+/*
+ * Find a group and setup RMID.
+ *
+ * If we're part of a group, we use the group's RMID.
+ */
+static void intel_cqm_setup_event(struct perf_event *event,
+                                 struct perf_event **group)
+{
+       struct perf_event *iter;
+       bool conflict = false;
+       u32 rmid;
+
+       list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+               rmid = iter->hw.cqm_rmid;
+
+               if (__match_event(iter, event)) {
+                       /* All tasks in a group share an RMID */
+                       event->hw.cqm_rmid = rmid;
+                       *group = iter;
+                       return;
+               }
+
+               /*
+                * We only care about conflicts for events that are
+                * actually scheduled in (and hence have a valid RMID).
+                */
+               if (__conflict_event(iter, event) && __rmid_valid(rmid))
+                       conflict = true;
+       }
+
+       if (conflict)
+               rmid = INVALID_RMID;
+       else
+               rmid = __get_rmid();
+
+       event->hw.cqm_rmid = rmid;
+}
+
+static void intel_cqm_event_read(struct perf_event *event)
+{
+       unsigned long flags;
+       u32 rmid;
+       u64 val;
+
+       /*
+        * Task events are handled by intel_cqm_event_count().
+        */
+       if (event->cpu == -1)
+               return;
+
+       raw_spin_lock_irqsave(&cache_lock, flags);
+       rmid = event->hw.cqm_rmid;
+
+       if (!__rmid_valid(rmid))
+               goto out;
+
+       val = __rmid_read(rmid);
+
+       /*
+        * Ignore this reading on error states and do not update the value.
+        */
+       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+               goto out;
+
+       local64_set(&event->count, val);
+out:
+       raw_spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+static void __intel_cqm_event_count(void *info)
+{
+       struct rmid_read *rr = info;
+       u64 val;
+
+       val = __rmid_read(rr->rmid);
+
+       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+               return;
+
+       atomic64_add(val, &rr->value);
+}
+
+static inline bool cqm_group_leader(struct perf_event *event)
+{
+       return !list_empty(&event->hw.cqm_groups_entry);
+}
+
+static u64 intel_cqm_event_count(struct perf_event *event)
+{
+       unsigned long flags;
+       struct rmid_read rr = {
+               .value = ATOMIC64_INIT(0),
+       };
+
+       /*
+        * We only need to worry about task events. System-wide events
+        * are handled like usual, i.e. entirely with
+        * intel_cqm_event_read().
+        */
+       if (event->cpu != -1)
+               return __perf_event_count(event);
+
+       /*
+        * Only the group leader gets to report values. This stops us
+        * reporting duplicate values to userspace, and gives us a clear
+        * rule for which task gets to report the values.
+        *
+        * Note that it is impossible to attribute these values to
+        * specific packages - we forfeit that ability when we create
+        * task events.
+        */
+       if (!cqm_group_leader(event))
+               return 0;
+
+       /*
+        * Getting up-to-date values requires an SMP IPI which is not
+        * possible if we're being called in interrupt context. Return
+        * the cached values instead.
+        */
+       if (unlikely(in_interrupt()))
+               goto out;
+
+       /*
+        * Notice that we don't perform the reading of an RMID
+        * atomically, because we can't hold a spin lock across the
+        * IPIs.
+        *
+        * Speculatively perform the read, since @event might be
+        * assigned a different (possibly invalid) RMID while we're
+        * busying performing the IPI calls. It's therefore necessary to
+        * check @event's RMID afterwards, and if it has changed,
+        * discard the result of the read.
+        */
+       rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
+
+       if (!__rmid_valid(rr.rmid))
+               goto out;
+
+       on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
+
+       raw_spin_lock_irqsave(&cache_lock, flags);
+       if (event->hw.cqm_rmid == rr.rmid)
+               local64_set(&event->count, atomic64_read(&rr.value));
+       raw_spin_unlock_irqrestore(&cache_lock, flags);
+out:
+       return __perf_event_count(event);
+}
+
+static void intel_cqm_event_start(struct perf_event *event, int mode)
+{
+       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+       u32 rmid = event->hw.cqm_rmid;
+
+       if (!(event->hw.cqm_state & PERF_HES_STOPPED))
+               return;
+
+       event->hw.cqm_state &= ~PERF_HES_STOPPED;
+
+       if (state->rmid_usecnt++) {
+               if (!WARN_ON_ONCE(state->rmid != rmid))
+                       return;
+       } else {
+               WARN_ON_ONCE(state->rmid);
+       }
+
+       state->rmid = rmid;
+       wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
+}
+
+static void intel_cqm_event_stop(struct perf_event *event, int mode)
+{
+       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+
+       if (event->hw.cqm_state & PERF_HES_STOPPED)
+               return;
+
+       event->hw.cqm_state |= PERF_HES_STOPPED;
+
+       intel_cqm_event_read(event);
+
+       if (!--state->rmid_usecnt) {
+               state->rmid = 0;
+               wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
+       } else {
+               WARN_ON_ONCE(!state->rmid);
+       }
+}
+
+static int intel_cqm_event_add(struct perf_event *event, int mode)
+{
+       unsigned long flags;
+       u32 rmid;
+
+       raw_spin_lock_irqsave(&cache_lock, flags);
+
+       event->hw.cqm_state = PERF_HES_STOPPED;
+       rmid = event->hw.cqm_rmid;
+
+       if (__rmid_valid(rmid) && (mode & PERF_EF_START))
+               intel_cqm_event_start(event, mode);
+
+       raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+       return 0;
+}
+
+static void intel_cqm_event_destroy(struct perf_event *event)
+{
+       struct perf_event *group_other = NULL;
+
+       mutex_lock(&cache_mutex);
+
+       /*
+        * If there's another event in this group...
+        */
+       if (!list_empty(&event->hw.cqm_group_entry)) {
+               group_other = list_first_entry(&event->hw.cqm_group_entry,
+                                              struct perf_event,
+                                              hw.cqm_group_entry);
+               list_del(&event->hw.cqm_group_entry);
+       }
+
+       /*
+        * And we're the group leader..
+        */
+       if (cqm_group_leader(event)) {
+               /*
+                * If there was a group_other, make that leader, otherwise
+                * destroy the group and return the RMID.
+                */
+               if (group_other) {
+                       list_replace(&event->hw.cqm_groups_entry,
+                                    &group_other->hw.cqm_groups_entry);
+               } else {
+                       u32 rmid = event->hw.cqm_rmid;
+
+                       if (__rmid_valid(rmid))
+                               __put_rmid(rmid);
+                       list_del(&event->hw.cqm_groups_entry);
+               }
+       }
+
+       mutex_unlock(&cache_mutex);
+}
+
+static int intel_cqm_event_init(struct perf_event *event)
+{
+       struct perf_event *group = NULL;
+       bool rotate = false;
+
+       if (event->attr.type != intel_cqm_pmu.type)
+               return -ENOENT;
+
+       if (event->attr.config & ~QOS_EVENT_MASK)
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       INIT_LIST_HEAD(&event->hw.cqm_group_entry);
+       INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
+
+       event->destroy = intel_cqm_event_destroy;
+
+       mutex_lock(&cache_mutex);
+
+       /* Will also set rmid */
+       intel_cqm_setup_event(event, &group);
+
+       if (group) {
+               list_add_tail(&event->hw.cqm_group_entry,
+                             &group->hw.cqm_group_entry);
+       } else {
+               list_add_tail(&event->hw.cqm_groups_entry,
+                             &cache_groups);
+
+               /*
+                * All RMIDs are either in use or have recently been
+                * used. Kick the rotation worker to clean/free some.
+                *
+                * We only do this for the group leader, rather than for
+                * every event in a group to save on needless work.
+                */
+               if (!__rmid_valid(event->hw.cqm_rmid))
+                       rotate = true;
+       }
+
+       mutex_unlock(&cache_mutex);
+
+       if (rotate)
+               schedule_delayed_work(&intel_cqm_rmid_work, 0);
+
+       return 0;
+}
+
+EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
+EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
+EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
+EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
+EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
+
+static struct attribute *intel_cqm_events_attr[] = {
+       EVENT_PTR(intel_cqm_llc),
+       EVENT_PTR(intel_cqm_llc_pkg),
+       EVENT_PTR(intel_cqm_llc_unit),
+       EVENT_PTR(intel_cqm_llc_scale),
+       EVENT_PTR(intel_cqm_llc_snapshot),
+       NULL,
+};
+
+static struct attribute_group intel_cqm_events_group = {
+       .name = "events",
+       .attrs = intel_cqm_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+static struct attribute *intel_cqm_formats_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group intel_cqm_format_group = {
+       .name = "format",
+       .attrs = intel_cqm_formats_attr,
+};
+
+static ssize_t
+max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
+                          char *page)
+{
+       ssize_t rv;
+
+       mutex_lock(&cache_mutex);
+       rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
+       mutex_unlock(&cache_mutex);
+
+       return rv;
+}
+
+static ssize_t
+max_recycle_threshold_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+{
+       unsigned int bytes, cachelines;
+       int ret;
+
+       ret = kstrtouint(buf, 0, &bytes);
+       if (ret)
+               return ret;
+
+       mutex_lock(&cache_mutex);
+
+       __intel_cqm_max_threshold = bytes;
+       cachelines = bytes / cqm_l3_scale;
+
+       /*
+        * The new maximum takes effect immediately.
+        */
+       if (__intel_cqm_threshold > cachelines)
+               __intel_cqm_threshold = cachelines;
+
+       mutex_unlock(&cache_mutex);
+
+       return count;
+}
+
+static DEVICE_ATTR_RW(max_recycle_threshold);
+
+static struct attribute *intel_cqm_attrs[] = {
+       &dev_attr_max_recycle_threshold.attr,
+       NULL,
+};
+
+static const struct attribute_group intel_cqm_group = {
+       .attrs = intel_cqm_attrs,
+};
+
+static const struct attribute_group *intel_cqm_attr_groups[] = {
+       &intel_cqm_events_group,
+       &intel_cqm_format_group,
+       &intel_cqm_group,
+       NULL,
+};
+
+static struct pmu intel_cqm_pmu = {
+       .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
+       .attr_groups         = intel_cqm_attr_groups,
+       .task_ctx_nr         = perf_sw_context,
+       .event_init          = intel_cqm_event_init,
+       .add                 = intel_cqm_event_add,
+       .del                 = intel_cqm_event_stop,
+       .start               = intel_cqm_event_start,
+       .stop                = intel_cqm_event_stop,
+       .read                = intel_cqm_event_read,
+       .count               = intel_cqm_event_count,
+};
+
+static inline void cqm_pick_event_reader(int cpu)
+{
+       int reader;
+
+       /* First online cpu in package becomes the reader */
+       reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
+       if (reader >= nr_cpu_ids)
+               cpumask_set_cpu(cpu, &cqm_cpumask);
+}
+
+static void intel_cqm_cpu_starting(unsigned int cpu)
+{
+       struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
+       struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+       state->rmid = 0;
+       state->closid = 0;
+       state->rmid_usecnt = 0;
+
+       WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
+       WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
+}
+
+static void intel_cqm_cpu_exit(unsigned int cpu)
+{
+       int target;
+
+       /* Is @cpu the current cqm reader for this package ? */
+       if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
+               return;
+
+       /* Find another online reader in this package */
+       target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+       if (target < nr_cpu_ids)
+               cpumask_set_cpu(target, &cqm_cpumask);
+}
+
+static int intel_cqm_cpu_notifier(struct notifier_block *nb,
+                                 unsigned long action, void *hcpu)
+{
+       unsigned int cpu  = (unsigned long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               intel_cqm_cpu_exit(cpu);
+               break;
+       case CPU_STARTING:
+               intel_cqm_cpu_starting(cpu);
+               cqm_pick_event_reader(cpu);
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static const struct x86_cpu_id intel_cqm_match[] = {
+       { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
+       {}
+};
+
+static int __init intel_cqm_init(void)
+{
+       char *str, scale[20];
+       int i, cpu, ret;
+
+       if (!x86_match_cpu(intel_cqm_match))
+               return -ENODEV;
+
+       cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
+
+       /*
+        * It's possible that not all resources support the same number
+        * of RMIDs. Instead of making scheduling much more complicated
+        * (where we have to match a task's RMID to a cpu that supports
+        * that many RMIDs) just find the minimum RMIDs supported across
+        * all cpus.
+        *
+        * Also, check that the scales match on all cpus.
+        */
+       cpu_notifier_register_begin();
+
+       for_each_online_cpu(cpu) {
+               struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+               if (c->x86_cache_max_rmid < cqm_max_rmid)
+                       cqm_max_rmid = c->x86_cache_max_rmid;
+
+               if (c->x86_cache_occ_scale != cqm_l3_scale) {
+                       pr_err("Multiple LLC scale values, disabling\n");
+                       ret = -EINVAL;
+                       goto out;
+               }
+       }
+
+       /*
+        * A reasonable upper limit on the max threshold is the number
+        * of lines tagged per RMID if all RMIDs have the same number of
+        * lines tagged in the LLC.
+        *
+        * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
+        */
+       __intel_cqm_max_threshold =
+               boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
+
+       snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
+       str = kstrdup(scale, GFP_KERNEL);
+       if (!str) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       event_attr_intel_cqm_llc_scale.event_str = str;
+
+       ret = intel_cqm_setup_rmid_cache();
+       if (ret)
+               goto out;
+
+       for_each_online_cpu(i) {
+               intel_cqm_cpu_starting(i);
+               cqm_pick_event_reader(i);
+       }
+
+       __perf_cpu_notifier(intel_cqm_cpu_notifier);
+
+       ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
+       if (ret)
+               pr_err("Intel CQM perf registration failed: %d\n", ret);
+       else
+               pr_info("Intel CQM monitoring enabled\n");
+
+out:
+       cpu_notifier_register_done();
+
+       return ret;
+}
+device_initcall(intel_cqm_init);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
new file mode 100644 (file)
index 0000000..7946c42
--- /dev/null
@@ -0,0 +1,694 @@
+/*
+ * perf_event_intel_cstate.c: support cstate residency counters
+ *
+ * Copyright (C) 2015, Intel Corp.
+ * Author: Kan Liang (kan.liang@intel.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ */
+
+/*
+ * This file export cstate related free running (read-only) counters
+ * for perf. These counters may be use simultaneously by other tools,
+ * such as turbostat. However, it still make sense to implement them
+ * in perf. Because we can conveniently collect them together with
+ * other events, and allow to use them from tools without special MSR
+ * access code.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it is not supported by the hardware.
+ *
+ * According to counters' scope and category, two PMUs are registered
+ * with the perf_event core subsystem.
+ *  - 'cstate_core': The counter is available for each physical core.
+ *    The counters include CORE_C*_RESIDENCY.
+ *  - 'cstate_pkg': The counter is available for each physical package.
+ *    The counters include PKG_C*_RESIDENCY.
+ *
+ * All of these counters are specified in the Intel® 64 and IA-32
+ * Architectures Software Developer.s Manual Vol3b.
+ *
+ * Model specific counters:
+ *     MSR_CORE_C1_RES: CORE C1 Residency Counter
+ *                      perf code: 0x00
+ *                      Available model: SLM,AMT
+ *                      Scope: Core (each processor core has a MSR)
+ *     MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
+ *                            perf code: 0x01
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Core
+ *     MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
+ *                            perf code: 0x02
+ *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Core
+ *     MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
+ *                            perf code: 0x03
+ *                            Available model: SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Core
+ *     MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
+ *                            perf code: 0x00
+ *                            Available model: SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
+ *                            perf code: 0x01
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
+ *                            perf code: 0x02
+ *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
+ *                            perf code: 0x03
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
+ *                            perf code: 0x04
+ *                            Available model: HSW ULT only
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
+ *                            perf code: 0x05
+ *                            Available model: HSW ULT only
+ *                            Scope: Package (physical package)
+ *     MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
+ *                            perf code: 0x06
+ *                            Available model: HSW ULT only
+ *                            Scope: Package (physical package)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)                \
+static ssize_t __cstate_##_var##_show(struct kobject *kobj,    \
+                               struct kobj_attribute *attr,    \
+                               char *page)                     \
+{                                                              \
+       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
+       return sprintf(page, _format "\n");                     \
+}                                                              \
+static struct kobj_attribute format_attr_##_var =              \
+       __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buf);
+
+struct perf_cstate_msr {
+       u64     msr;
+       struct  perf_pmu_events_attr *attr;
+       bool    (*test)(int idx);
+};
+
+
+/* cstate_core PMU */
+
+static struct pmu cstate_core_pmu;
+static bool has_cstate_core;
+
+enum perf_cstate_core_id {
+       /*
+        * cstate_core events
+        */
+       PERF_CSTATE_CORE_C1_RES = 0,
+       PERF_CSTATE_CORE_C3_RES,
+       PERF_CSTATE_CORE_C6_RES,
+       PERF_CSTATE_CORE_C7_RES,
+
+       PERF_CSTATE_CORE_EVENT_MAX,
+};
+
+bool test_core(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+               if (idx == PERF_CSTATE_CORE_C3_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES)
+                       return true;
+               break;
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_CSTATE_CORE_C3_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES ||
+                   idx == PERF_CSTATE_CORE_C7_RES)
+                       return true;
+               break;
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_CSTATE_CORE_C1_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
+PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
+
+static struct perf_cstate_msr core_msr[] = {
+       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1, test_core, },
+       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3, test_core, },
+       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6, test_core, },
+       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7, test_core, },
+};
+
+static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
+       NULL,
+};
+
+static struct attribute_group core_events_attr_group = {
+       .name = "events",
+       .attrs = core_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
+static struct attribute *core_format_attrs[] = {
+       &format_attr_core_event.attr,
+       NULL,
+};
+
+static struct attribute_group core_format_attr_group = {
+       .name = "format",
+       .attrs = core_format_attrs,
+};
+
+static cpumask_t cstate_core_cpu_mask;
+static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
+
+static struct attribute *cstate_cpumask_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+       .attrs = cstate_cpumask_attrs,
+};
+
+static const struct attribute_group *core_attr_groups[] = {
+       &core_events_attr_group,
+       &core_format_attr_group,
+       &cpumask_attr_group,
+       NULL,
+};
+
+/* cstate_core PMU end */
+
+
+/* cstate_pkg PMU */
+
+static struct pmu cstate_pkg_pmu;
+static bool has_cstate_pkg;
+
+enum perf_cstate_pkg_id {
+       /*
+        * cstate_pkg events
+        */
+       PERF_CSTATE_PKG_C2_RES = 0,
+       PERF_CSTATE_PKG_C3_RES,
+       PERF_CSTATE_PKG_C6_RES,
+       PERF_CSTATE_PKG_C7_RES,
+       PERF_CSTATE_PKG_C8_RES,
+       PERF_CSTATE_PKG_C9_RES,
+       PERF_CSTATE_PKG_C10_RES,
+
+       PERF_CSTATE_PKG_EVENT_MAX,
+};
+
+bool test_pkg(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+               if (idx == PERF_CSTATE_CORE_C3_RES ||
+                   idx == PERF_CSTATE_CORE_C6_RES ||
+                   idx == PERF_CSTATE_CORE_C7_RES)
+                       return true;
+               break;
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_CSTATE_PKG_C2_RES ||
+                   idx == PERF_CSTATE_PKG_C3_RES ||
+                   idx == PERF_CSTATE_PKG_C6_RES ||
+                   idx == PERF_CSTATE_PKG_C7_RES)
+                       return true;
+               break;
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_CSTATE_CORE_C6_RES)
+                       return true;
+               break;
+       case 69: /* 22nm Haswell ULT */
+               if (idx == PERF_CSTATE_PKG_C2_RES ||
+                   idx == PERF_CSTATE_PKG_C3_RES ||
+                   idx == PERF_CSTATE_PKG_C6_RES ||
+                   idx == PERF_CSTATE_PKG_C7_RES ||
+                   idx == PERF_CSTATE_PKG_C8_RES ||
+                   idx == PERF_CSTATE_PKG_C9_RES ||
+                   idx == PERF_CSTATE_PKG_C10_RES)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
+PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
+PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
+PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
+PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
+PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
+PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
+PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
+
+static struct perf_cstate_msr pkg_msr[] = {
+       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2,  test_pkg, },
+       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3,  test_pkg, },
+       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6,  test_pkg, },
+       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7,  test_pkg, },
+       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8,  test_pkg, },
+       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9,  test_pkg, },
+       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10, test_pkg, },
+};
+
+static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
+       NULL,
+};
+
+static struct attribute_group pkg_events_attr_group = {
+       .name = "events",
+       .attrs = pkg_events_attrs,
+};
+
+DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
+static struct attribute *pkg_format_attrs[] = {
+       &format_attr_pkg_event.attr,
+       NULL,
+};
+static struct attribute_group pkg_format_attr_group = {
+       .name = "format",
+       .attrs = pkg_format_attrs,
+};
+
+static cpumask_t cstate_pkg_cpu_mask;
+
+static const struct attribute_group *pkg_attr_groups[] = {
+       &pkg_events_attr_group,
+       &pkg_format_attr_group,
+       &cpumask_attr_group,
+       NULL,
+};
+
+/* cstate_pkg PMU end*/
+
+static ssize_t cstate_get_attr_cpumask(struct device *dev,
+                                      struct device_attribute *attr,
+                                      char *buf)
+{
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       if (pmu == &cstate_core_pmu)
+               return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
+       else if (pmu == &cstate_pkg_pmu)
+               return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
+       else
+               return 0;
+}
+
+static int cstate_pmu_event_init(struct perf_event *event)
+{
+       u64 cfg = event->attr.config;
+       int ret = 0;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       if (event->pmu == &cstate_core_pmu) {
+               if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
+                       return -EINVAL;
+               if (!core_msr[cfg].attr)
+                       return -EINVAL;
+               event->hw.event_base = core_msr[cfg].msr;
+       } else if (event->pmu == &cstate_pkg_pmu) {
+               if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
+                       return -EINVAL;
+               if (!pkg_msr[cfg].attr)
+                       return -EINVAL;
+               event->hw.event_base = pkg_msr[cfg].msr;
+       } else
+               return -ENOENT;
+
+       /* must be done before validate_group */
+       event->hw.config = cfg;
+       event->hw.idx = -1;
+
+       return ret;
+}
+
+static inline u64 cstate_pmu_read_counter(struct perf_event *event)
+{
+       u64 val;
+
+       rdmsrl(event->hw.event_base, val);
+       return val;
+}
+
+static void cstate_pmu_event_update(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 prev_raw_count, new_raw_count;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = cstate_pmu_read_counter(event);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                           new_raw_count) != prev_raw_count)
+               goto again;
+
+       local64_add(new_raw_count - prev_raw_count, &event->count);
+}
+
+static void cstate_pmu_event_start(struct perf_event *event, int mode)
+{
+       local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
+}
+
+static void cstate_pmu_event_stop(struct perf_event *event, int mode)
+{
+       cstate_pmu_event_update(event);
+}
+
+static void cstate_pmu_event_del(struct perf_event *event, int mode)
+{
+       cstate_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int cstate_pmu_event_add(struct perf_event *event, int mode)
+{
+       if (mode & PERF_EF_START)
+               cstate_pmu_event_start(event, mode);
+
+       return 0;
+}
+
+static void cstate_cpu_exit(int cpu)
+{
+       int i, id, target;
+
+       /* cpu exit for cstate core */
+       if (has_cstate_core) {
+               id = topology_core_id(cpu);
+               target = -1;
+
+               for_each_online_cpu(i) {
+                       if (i == cpu)
+                               continue;
+                       if (id == topology_core_id(i)) {
+                               target = i;
+                               break;
+                       }
+               }
+               if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+                       cpumask_set_cpu(target, &cstate_core_cpu_mask);
+               WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
+               if (target >= 0)
+                       perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+       }
+
+       /* cpu exit for cstate pkg */
+       if (has_cstate_pkg) {
+               id = topology_physical_package_id(cpu);
+               target = -1;
+
+               for_each_online_cpu(i) {
+                       if (i == cpu)
+                               continue;
+                       if (id == topology_physical_package_id(i)) {
+                               target = i;
+                               break;
+                       }
+               }
+               if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+                       cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
+               WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
+               if (target >= 0)
+                       perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+       }
+}
+
+static void cstate_cpu_init(int cpu)
+{
+       int i, id;
+
+       /* cpu init for cstate core */
+       if (has_cstate_core) {
+               id = topology_core_id(cpu);
+               for_each_cpu(i, &cstate_core_cpu_mask) {
+                       if (id == topology_core_id(i))
+                               break;
+               }
+               if (i >= nr_cpu_ids)
+                       cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+       }
+
+       /* cpu init for cstate pkg */
+       if (has_cstate_pkg) {
+               id = topology_physical_package_id(cpu);
+               for_each_cpu(i, &cstate_pkg_cpu_mask) {
+                       if (id == topology_physical_package_id(i))
+                               break;
+               }
+               if (i >= nr_cpu_ids)
+                       cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
+       }
+}
+
+static int cstate_cpu_notifier(struct notifier_block *self,
+                                 unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               break;
+       case CPU_STARTING:
+               cstate_cpu_init(cpu);
+               break;
+       case CPU_UP_CANCELED:
+       case CPU_DYING:
+               break;
+       case CPU_ONLINE:
+       case CPU_DEAD:
+               break;
+       case CPU_DOWN_PREPARE:
+               cstate_cpu_exit(cpu);
+               break;
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there is no available events.
+ */
+static bool cstate_probe_msr(struct perf_cstate_msr *msr,
+                            struct attribute   **events_attrs,
+                            int max_event_nr)
+{
+       int i, j = 0;
+       u64 val;
+
+       /* Probe the cstate events. */
+       for (i = 0; i < max_event_nr; i++) {
+               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+                       msr[i].attr = NULL;
+       }
+
+       /* List remaining events in the sysfs attrs. */
+       for (i = 0; i < max_event_nr; i++) {
+               if (msr[i].attr)
+                       events_attrs[j++] = &msr[i].attr->attr.attr;
+       }
+       events_attrs[j] = NULL;
+
+       return (j > 0) ? true : false;
+}
+
+static int __init cstate_init(void)
+{
+       /* SLM has different MSR for PKG C6 */
+       switch (boot_cpu_data.x86_model) {
+       case 55:
+       case 76:
+       case 77:
+               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+       }
+
+       if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
+               has_cstate_core = true;
+
+       if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
+               has_cstate_pkg = true;
+
+       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static void __init cstate_cpumask_init(void)
+{
+       int cpu;
+
+       cpu_notifier_register_begin();
+
+       for_each_online_cpu(cpu)
+               cstate_cpu_init(cpu);
+
+       __perf_cpu_notifier(cstate_cpu_notifier);
+
+       cpu_notifier_register_done();
+}
+
+static struct pmu cstate_core_pmu = {
+       .attr_groups    = core_attr_groups,
+       .name           = "cstate_core",
+       .task_ctx_nr    = perf_invalid_context,
+       .event_init     = cstate_pmu_event_init,
+       .add            = cstate_pmu_event_add, /* must have */
+       .del            = cstate_pmu_event_del, /* must have */
+       .start          = cstate_pmu_event_start,
+       .stop           = cstate_pmu_event_stop,
+       .read           = cstate_pmu_event_update,
+       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static struct pmu cstate_pkg_pmu = {
+       .attr_groups    = pkg_attr_groups,
+       .name           = "cstate_pkg",
+       .task_ctx_nr    = perf_invalid_context,
+       .event_init     = cstate_pmu_event_init,
+       .add            = cstate_pmu_event_add, /* must have */
+       .del            = cstate_pmu_event_del, /* must have */
+       .start          = cstate_pmu_event_start,
+       .stop           = cstate_pmu_event_stop,
+       .read           = cstate_pmu_event_update,
+       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static void __init cstate_pmus_register(void)
+{
+       int err;
+
+       if (has_cstate_core) {
+               err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
+               if (WARN_ON(err))
+                       pr_info("Failed to register PMU %s error %d\n",
+                               cstate_core_pmu.name, err);
+       }
+
+       if (has_cstate_pkg) {
+               err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
+               if (WARN_ON(err))
+                       pr_info("Failed to register PMU %s error %d\n",
+                               cstate_pkg_pmu.name, err);
+       }
+}
+
+static int __init cstate_pmu_init(void)
+{
+       int err;
+
+       if (cpu_has_hypervisor)
+               return -ENODEV;
+
+       err = cstate_init();
+       if (err)
+               return err;
+
+       cstate_cpumask_init();
+
+       cstate_pmus_register();
+
+       return 0;
+}
+
+device_initcall(cstate_pmu_init);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
new file mode 100644 (file)
index 0000000..ce7211a
--- /dev/null
@@ -0,0 +1,1410 @@
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+
+#include "../perf_event.h"
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE                24
+
+#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
+#define PEBS_FIXUP_SIZE                PAGE_SIZE
+
+/*
+ * pebs_record_32 for p4 and core not supported
+
+struct pebs_record_32 {
+       u32 flags, ip;
+       u32 ax, bc, cx, dx;
+       u32 si, di, bp, sp;
+};
+
+ */
+
+union intel_x86_pebs_dse {
+       u64 val;
+       struct {
+               unsigned int ld_dse:4;
+               unsigned int ld_stlb_miss:1;
+               unsigned int ld_locked:1;
+               unsigned int ld_reserved:26;
+       };
+       struct {
+               unsigned int st_l1d_hit:1;
+               unsigned int st_reserved1:3;
+               unsigned int st_stlb_miss:1;
+               unsigned int st_locked:1;
+               unsigned int st_reserved2:26;
+       };
+};
+
+
+/*
+ * Map PEBS Load Latency Data Source encodings to generic
+ * memory data source information
+ */
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+/* Version for Sandy Bridge and later */
+static u64 pebs_data_source[] = {
+       P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
+       OP_LH | P(LVL, L1)  | P(SNOOP, NONE),   /* 0x01: L1 local */
+       OP_LH | P(LVL, LFB) | P(SNOOP, NONE),   /* 0x02: LFB hit */
+       OP_LH | P(LVL, L2)  | P(SNOOP, NONE),   /* 0x03: L2 hit */
+       OP_LH | P(LVL, L3)  | P(SNOOP, NONE),   /* 0x04: L3 hit */
+       OP_LH | P(LVL, L3)  | P(SNOOP, MISS),   /* 0x05: L3 hit, snoop miss */
+       OP_LH | P(LVL, L3)  | P(SNOOP, HIT),    /* 0x06: L3 hit, snoop hit */
+       OP_LH | P(LVL, L3)  | P(SNOOP, HITM),   /* 0x07: L3 hit, snoop hitm */
+       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
+       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
+       OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
+       OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
+       OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
+       OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
+       OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
+       OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
+};
+
+/* Patch up minor differences in the bits */
+void __init intel_pmu_pebs_data_source_nhm(void)
+{
+       pebs_data_source[0x05] = OP_LH | P(LVL, L3)  | P(SNOOP, HIT);
+       pebs_data_source[0x06] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+       pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
+}
+
+static u64 precise_store_data(u64 status)
+{
+       union intel_x86_pebs_dse dse;
+       u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
+
+       dse.val = status;
+
+       /*
+        * bit 4: TLB access
+        * 1 = stored missed 2nd level TLB
+        *
+        * so it either hit the walker or the OS
+        * otherwise hit 2nd level TLB
+        */
+       if (dse.st_stlb_miss)
+               val |= P(TLB, MISS);
+       else
+               val |= P(TLB, HIT);
+
+       /*
+        * bit 0: hit L1 data cache
+        * if not set, then all we know is that
+        * it missed L1D
+        */
+       if (dse.st_l1d_hit)
+               val |= P(LVL, HIT);
+       else
+               val |= P(LVL, MISS);
+
+       /*
+        * bit 5: Locked prefix
+        */
+       if (dse.st_locked)
+               val |= P(LOCK, LOCKED);
+
+       return val;
+}
+
+static u64 precise_datala_hsw(struct perf_event *event, u64 status)
+{
+       union perf_mem_data_src dse;
+
+       dse.val = PERF_MEM_NA;
+
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+               dse.mem_op = PERF_MEM_OP_STORE;
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
+               dse.mem_op = PERF_MEM_OP_LOAD;
+
+       /*
+        * L1 info only valid for following events:
+        *
+        * MEM_UOPS_RETIRED.STLB_MISS_STORES
+        * MEM_UOPS_RETIRED.LOCK_STORES
+        * MEM_UOPS_RETIRED.SPLIT_STORES
+        * MEM_UOPS_RETIRED.ALL_STORES
+        */
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
+               if (status & 1)
+                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+               else
+                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+       }
+       return dse.val;
+}
+
+static u64 load_latency_data(u64 status)
+{
+       union intel_x86_pebs_dse dse;
+       u64 val;
+       int model = boot_cpu_data.x86_model;
+       int fam = boot_cpu_data.x86;
+
+       dse.val = status;
+
+       /*
+        * use the mapping table for bit 0-3
+        */
+       val = pebs_data_source[dse.ld_dse];
+
+       /*
+        * Nehalem models do not support TLB, Lock infos
+        */
+       if (fam == 0x6 && (model == 26 || model == 30
+           || model == 31 || model == 46)) {
+               val |= P(TLB, NA) | P(LOCK, NA);
+               return val;
+       }
+       /*
+        * bit 4: TLB access
+        * 0 = did not miss 2nd level TLB
+        * 1 = missed 2nd level TLB
+        */
+       if (dse.ld_stlb_miss)
+               val |= P(TLB, MISS) | P(TLB, L2);
+       else
+               val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+       /*
+        * bit 5: locked prefix
+        */
+       if (dse.ld_locked)
+               val |= P(LOCK, LOCKED);
+
+       return val;
+}
+
+struct pebs_record_core {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+};
+
+struct pebs_record_nhm {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+};
+
+/*
+ * Same as pebs_record_nhm, with two additional fields.
+ */
+struct pebs_record_hsw {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+       u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+       struct {
+               u32 cycles_last_block     : 32,
+                   hle_abort             : 1,
+                   rtm_abort             : 1,
+                   instruction_abort     : 1,
+                   non_instruction_abort : 1,
+                   retry                 : 1,
+                   data_conflict         : 1,
+                   capacity_writes       : 1,
+                   capacity_reads        : 1;
+       };
+       u64         value;
+};
+
+#define PEBS_HSW_TSX_FLAGS     0xff00000000ULL
+
+/* Same as HSW, plus TSC */
+
+struct pebs_record_skl {
+       u64 flags, ip;
+       u64 ax, bx, cx, dx;
+       u64 si, di, bp, sp;
+       u64 r8,  r9,  r10, r11;
+       u64 r12, r13, r14, r15;
+       u64 status, dla, dse, lat;
+       u64 real_ip, tsx_tuning;
+       u64 tsc;
+};
+
+void init_debug_store_on_cpu(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds)
+               return;
+
+       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+                    (u32)((u64)(unsigned long)ds),
+                    (u32)((u64)(unsigned long)ds >> 32));
+}
+
+void fini_debug_store_on_cpu(int cpu)
+{
+       if (!per_cpu(cpu_hw_events, cpu).ds)
+               return;
+
+       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static DEFINE_PER_CPU(void *, insn_buffer);
+
+static int alloc_pebs_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       int node = cpu_to_node(cpu);
+       int max;
+       void *buffer, *ibuffer;
+
+       if (!x86_pmu.pebs)
+               return 0;
+
+       buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+       if (unlikely(!buffer))
+               return -ENOMEM;
+
+       /*
+        * HSW+ already provides us the eventing ip; no need to allocate this
+        * buffer then.
+        */
+       if (x86_pmu.intel_cap.pebs_format < 2) {
+               ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+               if (!ibuffer) {
+                       kfree(buffer);
+                       return -ENOMEM;
+               }
+               per_cpu(insn_buffer, cpu) = ibuffer;
+       }
+
+       max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
+
+       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+       ds->pebs_index = ds->pebs_buffer_base;
+       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+               max * x86_pmu.pebs_record_size;
+
+       return 0;
+}
+
+static void release_pebs_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds || !x86_pmu.pebs)
+               return;
+
+       kfree(per_cpu(insn_buffer, cpu));
+       per_cpu(insn_buffer, cpu) = NULL;
+
+       kfree((void *)(unsigned long)ds->pebs_buffer_base);
+       ds->pebs_buffer_base = 0;
+}
+
+static int alloc_bts_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       int node = cpu_to_node(cpu);
+       int max, thresh;
+       void *buffer;
+
+       if (!x86_pmu.bts)
+               return 0;
+
+       buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+       if (unlikely(!buffer)) {
+               WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+               return -ENOMEM;
+       }
+
+       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+       thresh = max / 16;
+
+       ds->bts_buffer_base = (u64)(unsigned long)buffer;
+       ds->bts_index = ds->bts_buffer_base;
+       ds->bts_absolute_maximum = ds->bts_buffer_base +
+               max * BTS_RECORD_SIZE;
+       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+               thresh * BTS_RECORD_SIZE;
+
+       return 0;
+}
+
+static void release_bts_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds || !x86_pmu.bts)
+               return;
+
+       kfree((void *)(unsigned long)ds->bts_buffer_base);
+       ds->bts_buffer_base = 0;
+}
+
+static int alloc_ds_buffer(int cpu)
+{
+       int node = cpu_to_node(cpu);
+       struct debug_store *ds;
+
+       ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+       if (unlikely(!ds))
+               return -ENOMEM;
+
+       per_cpu(cpu_hw_events, cpu).ds = ds;
+
+       return 0;
+}
+
+static void release_ds_buffer(int cpu)
+{
+       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+       if (!ds)
+               return;
+
+       per_cpu(cpu_hw_events, cpu).ds = NULL;
+       kfree(ds);
+}
+
+void release_ds_buffers(void)
+{
+       int cpu;
+
+       if (!x86_pmu.bts && !x86_pmu.pebs)
+               return;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu)
+               fini_debug_store_on_cpu(cpu);
+
+       for_each_possible_cpu(cpu) {
+               release_pebs_buffer(cpu);
+               release_bts_buffer(cpu);
+               release_ds_buffer(cpu);
+       }
+       put_online_cpus();
+}
+
+void reserve_ds_buffers(void)
+{
+       int bts_err = 0, pebs_err = 0;
+       int cpu;
+
+       x86_pmu.bts_active = 0;
+       x86_pmu.pebs_active = 0;
+
+       if (!x86_pmu.bts && !x86_pmu.pebs)
+               return;
+
+       if (!x86_pmu.bts)
+               bts_err = 1;
+
+       if (!x86_pmu.pebs)
+               pebs_err = 1;
+
+       get_online_cpus();
+
+       for_each_possible_cpu(cpu) {
+               if (alloc_ds_buffer(cpu)) {
+                       bts_err = 1;
+                       pebs_err = 1;
+               }
+
+               if (!bts_err && alloc_bts_buffer(cpu))
+                       bts_err = 1;
+
+               if (!pebs_err && alloc_pebs_buffer(cpu))
+                       pebs_err = 1;
+
+               if (bts_err && pebs_err)
+                       break;
+       }
+
+       if (bts_err) {
+               for_each_possible_cpu(cpu)
+                       release_bts_buffer(cpu);
+       }
+
+       if (pebs_err) {
+               for_each_possible_cpu(cpu)
+                       release_pebs_buffer(cpu);
+       }
+
+       if (bts_err && pebs_err) {
+               for_each_possible_cpu(cpu)
+                       release_ds_buffer(cpu);
+       } else {
+               if (x86_pmu.bts && !bts_err)
+                       x86_pmu.bts_active = 1;
+
+               if (x86_pmu.pebs && !pebs_err)
+                       x86_pmu.pebs_active = 1;
+
+               for_each_online_cpu(cpu)
+                       init_debug_store_on_cpu(cpu);
+       }
+
+       put_online_cpus();
+}
+
+/*
+ * BTS
+ */
+
+struct event_constraint bts_constraint =
+       EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
+
+void intel_pmu_enable_bts(u64 config)
+{
+       unsigned long debugctlmsr;
+
+       debugctlmsr = get_debugctlmsr();
+
+       debugctlmsr |= DEBUGCTLMSR_TR;
+       debugctlmsr |= DEBUGCTLMSR_BTS;
+       if (config & ARCH_PERFMON_EVENTSEL_INT)
+               debugctlmsr |= DEBUGCTLMSR_BTINT;
+
+       if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
+
+       if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
+
+       update_debugctlmsr(debugctlmsr);
+}
+
+void intel_pmu_disable_bts(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       unsigned long debugctlmsr;
+
+       if (!cpuc->ds)
+               return;
+
+       debugctlmsr = get_debugctlmsr();
+
+       debugctlmsr &=
+               ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
+                 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
+
+       update_debugctlmsr(debugctlmsr);
+}
+
+int intel_pmu_drain_bts_buffer(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct debug_store *ds = cpuc->ds;
+       struct bts_record {
+               u64     from;
+               u64     to;
+               u64     flags;
+       };
+       struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
+       struct bts_record *at, *base, *top;
+       struct perf_output_handle handle;
+       struct perf_event_header header;
+       struct perf_sample_data data;
+       unsigned long skip = 0;
+       struct pt_regs regs;
+
+       if (!event)
+               return 0;
+
+       if (!x86_pmu.bts_active)
+               return 0;
+
+       base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+       top  = (struct bts_record *)(unsigned long)ds->bts_index;
+
+       if (top <= base)
+               return 0;
+
+       memset(&regs, 0, sizeof(regs));
+
+       ds->bts_index = ds->bts_buffer_base;
+
+       perf_sample_data_init(&data, 0, event->hw.last_period);
+
+       /*
+        * BTS leaks kernel addresses in branches across the cpl boundary,
+        * such as traps or system calls, so unless the user is asking for
+        * kernel tracing (and right now it's not possible), we'd need to
+        * filter them out. But first we need to count how many of those we
+        * have in the current batch. This is an extra O(n) pass, however,
+        * it's much faster than the other one especially considering that
+        * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
+        * alloc_bts_buffer()).
+        */
+       for (at = base; at < top; at++) {
+               /*
+                * Note that right now *this* BTS code only works if
+                * attr::exclude_kernel is set, but let's keep this extra
+                * check here in case that changes.
+                */
+               if (event->attr.exclude_kernel &&
+                   (kernel_ip(at->from) || kernel_ip(at->to)))
+                       skip++;
+       }
+
+       /*
+        * Prepare a generic sample, i.e. fill in the invariant fields.
+        * We will overwrite the from and to address before we output
+        * the sample.
+        */
+       perf_prepare_sample(&header, &data, event, &regs);
+
+       if (perf_output_begin(&handle, event, header.size *
+                             (top - base - skip)))
+               return 1;
+
+       for (at = base; at < top; at++) {
+               /* Filter out any records that contain kernel addresses. */
+               if (event->attr.exclude_kernel &&
+                   (kernel_ip(at->from) || kernel_ip(at->to)))
+                       continue;
+
+               data.ip         = at->from;
+               data.addr       = at->to;
+
+               perf_output_sample(&handle, &header, &data, event);
+       }
+
+       perf_output_end(&handle);
+
+       /* There's new data available. */
+       event->hw.interrupts++;
+       event->pending_kill = POLL_IN;
+       return 1;
+}
+
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+       struct pt_regs regs;
+
+       x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       if (!sched_in)
+               intel_pmu_drain_pebs_buffer();
+}
+
+/*
+ * PEBS
+ */
+struct event_constraint intel_core2_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_atom_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_nehalem_pebs_event_constraints[] = {
+       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
+       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_westmere_pebs_event_constraints[] = {
+       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
+       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_snb_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+       INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_ivb_pebs_event_constraints[] = {
+        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+        EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+       INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint intel_bdw_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+       INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
+       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+
+struct event_constraint intel_skl_pebs_event_constraints[] = {
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
+       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
+       /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
+       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
+       INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
+       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
+       /* Allow all events as PEBS with no flags */
+       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+struct event_constraint *intel_pebs_constraints(struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       if (!event->attr.precise_ip)
+               return NULL;
+
+       if (x86_pmu.pebs_constraints) {
+               for_each_event_constraint(c, x86_pmu.pebs_constraints) {
+                       if ((event->hw.config & c->cmask) == c->code) {
+                               event->hw.flags |= c->flags;
+                               return c;
+                       }
+               }
+       }
+
+       return &emptyconstraint;
+}
+
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+       return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
+void intel_pmu_pebs_enable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       struct debug_store *ds = cpuc->ds;
+       bool first_pebs;
+       u64 threshold;
+
+       hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+
+       first_pebs = !pebs_is_enabled(cpuc);
+       cpuc->pebs_enabled |= 1ULL << hwc->idx;
+
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+               cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+               cpuc->pebs_enabled |= 1ULL << 63;
+
+       /*
+        * When the event is constrained enough we can use a larger
+        * threshold and run the event with less frequent PMI.
+        */
+       if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+               threshold = ds->pebs_absolute_maximum -
+                       x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+               if (first_pebs)
+                       perf_sched_cb_inc(event->ctx->pmu);
+       } else {
+               threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+               /*
+                * If not all events can use larger buffer,
+                * roll back to threshold = 1
+                */
+               if (!first_pebs &&
+                   (ds->pebs_interrupt_threshold > threshold))
+                       perf_sched_cb_dec(event->ctx->pmu);
+       }
+
+       /* Use auto-reload if possible to save a MSR write in the PMI */
+       if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+               ds->pebs_event_reset[hwc->idx] =
+                       (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+       }
+
+       if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+               ds->pebs_interrupt_threshold = threshold;
+}
+
+void intel_pmu_pebs_disable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       struct debug_store *ds = cpuc->ds;
+       bool large_pebs = ds->pebs_interrupt_threshold >
+               ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+       if (large_pebs)
+               intel_pmu_drain_pebs_buffer();
+
+       cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+
+       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+               cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+               cpuc->pebs_enabled &= ~(1ULL << 63);
+
+       if (large_pebs && !pebs_is_enabled(cpuc))
+               perf_sched_cb_dec(event->ctx->pmu);
+
+       if (cpuc->enabled)
+               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+
+       hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+}
+
+void intel_pmu_pebs_enable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->pebs_enabled)
+               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+}
+
+void intel_pmu_pebs_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->pebs_enabled)
+               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+}
+
+static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       unsigned long from = cpuc->lbr_entries[0].from;
+       unsigned long old_to, to = cpuc->lbr_entries[0].to;
+       unsigned long ip = regs->ip;
+       int is_64bit = 0;
+       void *kaddr;
+       int size;
+
+       /*
+        * We don't need to fixup if the PEBS assist is fault like
+        */
+       if (!x86_pmu.intel_cap.pebs_trap)
+               return 1;
+
+       /*
+        * No LBR entry, no basic block, no rewinding
+        */
+       if (!cpuc->lbr_stack.nr || !from || !to)
+               return 0;
+
+       /*
+        * Basic blocks should never cross user/kernel boundaries
+        */
+       if (kernel_ip(ip) != kernel_ip(to))
+               return 0;
+
+       /*
+        * unsigned math, either ip is before the start (impossible) or
+        * the basic block is larger than 1 page (sanity)
+        */
+       if ((ip - to) > PEBS_FIXUP_SIZE)
+               return 0;
+
+       /*
+        * We sampled a branch insn, rewind using the LBR stack
+        */
+       if (ip == to) {
+               set_linear_ip(regs, from);
+               return 1;
+       }
+
+       size = ip - to;
+       if (!kernel_ip(ip)) {
+               int bytes;
+               u8 *buf = this_cpu_read(insn_buffer);
+
+               /* 'size' must fit our buffer, see above */
+               bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+               if (bytes != 0)
+                       return 0;
+
+               kaddr = buf;
+       } else {
+               kaddr = (void *)to;
+       }
+
+       do {
+               struct insn insn;
+
+               old_to = to;
+
+#ifdef CONFIG_X86_64
+               is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
+#endif
+               insn_init(&insn, kaddr, size, is_64bit);
+               insn_get_length(&insn);
+               /*
+                * Make sure there was not a problem decoding the
+                * instruction and getting the length.  This is
+                * doubly important because we have an infinite
+                * loop if insn.length=0.
+                */
+               if (!insn.length)
+                       break;
+
+               to += insn.length;
+               kaddr += insn.length;
+               size -= insn.length;
+       } while (to < ip);
+
+       if (to == ip) {
+               set_linear_ip(regs, old_to);
+               return 1;
+       }
+
+       /*
+        * Even though we decoded the basic block, the instruction stream
+        * never matched the given IP, either the TO or the IP got corrupted.
+        */
+       return 0;
+}
+
+static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
+{
+       if (pebs->tsx_tuning) {
+               union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+               return tsx.cycles_last_block;
+       }
+       return 0;
+}
+
+static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
+{
+       u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+
+       /* For RTM XABORTs also log the abort code from AX */
+       if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
+               txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+       return txn;
+}
+
+static void setup_pebs_sample_data(struct perf_event *event,
+                                  struct pt_regs *iregs, void *__pebs,
+                                  struct perf_sample_data *data,
+                                  struct pt_regs *regs)
+{
+#define PERF_X86_EVENT_PEBS_HSW_PREC \
+               (PERF_X86_EVENT_PEBS_ST_HSW | \
+                PERF_X86_EVENT_PEBS_LD_HSW | \
+                PERF_X86_EVENT_PEBS_NA_HSW)
+       /*
+        * We cast to the biggest pebs_record but are careful not to
+        * unconditionally access the 'extra' entries.
+        */
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct pebs_record_skl *pebs = __pebs;
+       u64 sample_type;
+       int fll, fst, dsrc;
+       int fl = event->hw.flags;
+
+       if (pebs == NULL)
+               return;
+
+       sample_type = event->attr.sample_type;
+       dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
+
+       fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
+       fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+
+       perf_sample_data_init(data, 0, event->hw.last_period);
+
+       data->period = event->hw.last_period;
+
+       /*
+        * Use latency for weight (only avail with PEBS-LL)
+        */
+       if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
+               data->weight = pebs->lat;
+
+       /*
+        * data.data_src encodes the data source
+        */
+       if (dsrc) {
+               u64 val = PERF_MEM_NA;
+               if (fll)
+                       val = load_latency_data(pebs->dse);
+               else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
+                       val = precise_datala_hsw(event, pebs->dse);
+               else if (fst)
+                       val = precise_store_data(pebs->dse);
+               data->data_src.val = val;
+       }
+
+       /*
+        * We use the interrupt regs as a base because the PEBS record
+        * does not contain a full regs set, specifically it seems to
+        * lack segment descriptors, which get used by things like
+        * user_mode().
+        *
+        * In the simple case fix up only the IP and BP,SP regs, for
+        * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
+        * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+        */
+       *regs = *iregs;
+       regs->flags = pebs->flags;
+       set_linear_ip(regs, pebs->ip);
+       regs->bp = pebs->bp;
+       regs->sp = pebs->sp;
+
+       if (sample_type & PERF_SAMPLE_REGS_INTR) {
+               regs->ax = pebs->ax;
+               regs->bx = pebs->bx;
+               regs->cx = pebs->cx;
+               regs->dx = pebs->dx;
+               regs->si = pebs->si;
+               regs->di = pebs->di;
+               regs->bp = pebs->bp;
+               regs->sp = pebs->sp;
+
+               regs->flags = pebs->flags;
+#ifndef CONFIG_X86_32
+               regs->r8 = pebs->r8;
+               regs->r9 = pebs->r9;
+               regs->r10 = pebs->r10;
+               regs->r11 = pebs->r11;
+               regs->r12 = pebs->r12;
+               regs->r13 = pebs->r13;
+               regs->r14 = pebs->r14;
+               regs->r15 = pebs->r15;
+#endif
+       }
+
+       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+               regs->ip = pebs->real_ip;
+               regs->flags |= PERF_EFLAGS_EXACT;
+       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+               regs->flags |= PERF_EFLAGS_EXACT;
+       else
+               regs->flags &= ~PERF_EFLAGS_EXACT;
+
+       if ((sample_type & PERF_SAMPLE_ADDR) &&
+           x86_pmu.intel_cap.pebs_format >= 1)
+               data->addr = pebs->dla;
+
+       if (x86_pmu.intel_cap.pebs_format >= 2) {
+               /* Only set the TSX weight when no memory weight. */
+               if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+                       data->weight = intel_hsw_weight(pebs);
+
+               if (sample_type & PERF_SAMPLE_TRANSACTION)
+                       data->txn = intel_hsw_transaction(pebs);
+       }
+
+       /*
+        * v3 supplies an accurate time stamp, so we use that
+        * for the time stamp.
+        *
+        * We can only do this for the default trace clock.
+        */
+       if (x86_pmu.intel_cap.pebs_format >= 3 &&
+               event->attr.use_clockid == 0)
+               data->time = native_sched_clock_from_tsc(pebs->tsc);
+
+       if (has_branch_stack(event))
+               data->br_stack = &cpuc->lbr_stack;
+}
+
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       void *at;
+       u64 pebs_status;
+
+       /*
+        * fmt0 does not have a status bitfield (does not use
+        * perf_record_nhm format)
+        */
+       if (x86_pmu.intel_cap.pebs_format < 1)
+               return base;
+
+       if (base == NULL)
+               return NULL;
+
+       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+               struct pebs_record_nhm *p = at;
+
+               if (test_bit(bit, (unsigned long *)&p->status)) {
+                       /* PEBS v3 has accurate status bits */
+                       if (x86_pmu.intel_cap.pebs_format >= 3)
+                               return at;
+
+                       if (p->status == (1 << bit))
+                               return at;
+
+                       /* clear non-PEBS bit and re-check */
+                       pebs_status = p->status & cpuc->pebs_enabled;
+                       pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+                       if (pebs_status == (1 << bit))
+                               return at;
+               }
+       }
+       return NULL;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+                                  struct pt_regs *iregs,
+                                  void *base, void *top,
+                                  int bit, int count)
+{
+       struct perf_sample_data data;
+       struct pt_regs regs;
+       void *at = get_next_pebs_record_by_bit(base, top, bit);
+
+       if (!intel_pmu_save_and_restart(event) &&
+           !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+               return;
+
+       while (count > 1) {
+               setup_pebs_sample_data(event, iregs, at, &data, &regs);
+               perf_event_output(event, &data, &regs);
+               at += x86_pmu.pebs_record_size;
+               at = get_next_pebs_record_by_bit(at, top, bit);
+               count--;
+       }
+
+       setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+       /*
+        * All but the last records are processed.
+        * The last one is left to be able to call the overflow handler.
+        */
+       if (perf_event_overflow(event, &data, &regs)) {
+               x86_pmu_stop(event, 0);
+               return;
+       }
+
+}
+
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct debug_store *ds = cpuc->ds;
+       struct perf_event *event = cpuc->events[0]; /* PMC0 only */
+       struct pebs_record_core *at, *top;
+       int n;
+
+       if (!x86_pmu.pebs_active)
+               return;
+
+       at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
+       top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
+
+       /*
+        * Whatever else happens, drain the thing
+        */
+       ds->pebs_index = ds->pebs_buffer_base;
+
+       if (!test_bit(0, cpuc->active_mask))
+               return;
+
+       WARN_ON_ONCE(!event);
+
+       if (!event->attr.precise_ip)
+               return;
+
+       n = top - at;
+       if (n <= 0)
+               return;
+
+       __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
+}
+
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct debug_store *ds = cpuc->ds;
+       struct perf_event *event;
+       void *base, *at, *top;
+       short counts[MAX_PEBS_EVENTS] = {};
+       short error[MAX_PEBS_EVENTS] = {};
+       int bit, i;
+
+       if (!x86_pmu.pebs_active)
+               return;
+
+       base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+       ds->pebs_index = ds->pebs_buffer_base;
+
+       if (unlikely(base >= top))
+               return;
+
+       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+               struct pebs_record_nhm *p = at;
+               u64 pebs_status;
+
+               /* PEBS v3 has accurate status bits */
+               if (x86_pmu.intel_cap.pebs_format >= 3) {
+                       for_each_set_bit(bit, (unsigned long *)&p->status,
+                                        MAX_PEBS_EVENTS)
+                               counts[bit]++;
+
+                       continue;
+               }
+
+               pebs_status = p->status & cpuc->pebs_enabled;
+               pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+
+               /*
+                * On some CPUs the PEBS status can be zero when PEBS is
+                * racing with clearing of GLOBAL_STATUS.
+                *
+                * Normally we would drop that record, but in the
+                * case when there is only a single active PEBS event
+                * we can assume it's for that event.
+                */
+               if (!pebs_status && cpuc->pebs_enabled &&
+                       !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
+                       pebs_status = cpuc->pebs_enabled;
+
+               bit = find_first_bit((unsigned long *)&pebs_status,
+                                       x86_pmu.max_pebs_events);
+               if (bit >= x86_pmu.max_pebs_events)
+                       continue;
+
+               /*
+                * The PEBS hardware does not deal well with the situation
+                * when events happen near to each other and multiple bits
+                * are set. But it should happen rarely.
+                *
+                * If these events include one PEBS and multiple non-PEBS
+                * events, it doesn't impact PEBS record. The record will
+                * be handled normally. (slow path)
+                *
+                * If these events include two or more PEBS events, the
+                * records for the events can be collapsed into a single
+                * one, and it's not possible to reconstruct all events
+                * that caused the PEBS record. It's called collision.
+                * If collision happened, the record will be dropped.
+                */
+               if (p->status != (1ULL << bit)) {
+                       for_each_set_bit(i, (unsigned long *)&pebs_status,
+                                        x86_pmu.max_pebs_events)
+                               error[i]++;
+                       continue;
+               }
+
+               counts[bit]++;
+       }
+
+       for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+               if ((counts[bit] == 0) && (error[bit] == 0))
+                       continue;
+
+               event = cpuc->events[bit];
+               WARN_ON_ONCE(!event);
+               WARN_ON_ONCE(!event->attr.precise_ip);
+
+               /* log dropped samples number */
+               if (error[bit])
+                       perf_log_lost_samples(event, error[bit]);
+
+               if (counts[bit]) {
+                       __intel_pmu_pebs_event(event, iregs, base,
+                                              top, bit, counts[bit]);
+               }
+       }
+}
+
+/*
+ * BTS, PEBS probe and setup
+ */
+
+void __init intel_ds_init(void)
+{
+       /*
+        * No support for 32bit formats
+        */
+       if (!boot_cpu_has(X86_FEATURE_DTES64))
+               return;
+
+       x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
+       x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+       x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
+       if (x86_pmu.pebs) {
+               char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
+               int format = x86_pmu.intel_cap.pebs_format;
+
+               switch (format) {
+               case 0:
+                       pr_cont("PEBS fmt0%c, ", pebs_type);
+                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+                       /*
+                        * Using >PAGE_SIZE buffers makes the WRMSR to
+                        * PERF_GLOBAL_CTRL in intel_pmu_enable_all()
+                        * mysteriously hang on Core2.
+                        *
+                        * As a workaround, we don't do this.
+                        */
+                       x86_pmu.pebs_buffer_size = PAGE_SIZE;
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
+                       break;
+
+               case 1:
+                       pr_cont("PEBS fmt1%c, ", pebs_type);
+                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+                       break;
+
+               case 2:
+                       pr_cont("PEBS fmt2%c, ", pebs_type);
+                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+                       break;
+
+               case 3:
+                       pr_cont("PEBS fmt3%c, ", pebs_type);
+                       x86_pmu.pebs_record_size =
+                                               sizeof(struct pebs_record_skl);
+                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+                       x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
+                       break;
+
+               default:
+                       pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
+                       x86_pmu.pebs = 0;
+               }
+       }
+}
+
+void perf_restore_debug_store(void)
+{
+       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+
+       if (!x86_pmu.bts && !x86_pmu.pebs)
+               return;
+
+       wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
+}
diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c
new file mode 100644 (file)
index 0000000..548d5f7
--- /dev/null
@@ -0,0 +1,321 @@
+/* Driver for Intel Xeon Phi "Knights Corner" PMU */
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/hardirq.h>
+
+#include "../perf_event.h"
+
+static const u64 knc_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]           = 0x002a,
+  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x0016,
+  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0028,
+  [PERF_COUNT_HW_CACHE_MISSES]         = 0x0029,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x0012,
+  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x002b,
+};
+
+static const u64 __initconst knc_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               /* On Xeon Phi event "0" is a valid DATA_READ          */
+               /*   (L1 Data Cache Reads) Instruction.                */
+               /* We code this as ARCH_PERFMON_EVENTSEL_INT as this   */
+               /* bit will always be set in x86_pmu_hw_config().      */
+               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+                                               /* DATA_READ           */
+               [ C(RESULT_MISS)   ] = 0x0003,  /* DATA_READ_MISS      */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE          */
+               [ C(RESULT_MISS)   ] = 0x0004,  /* DATA_WRITE_MISS     */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0011,  /* L1_DATA_PF1         */
+               [ C(RESULT_MISS)   ] = 0x001c,  /* L1_DATA_PF1_MISS    */
+       },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ          */
+               [ C(RESULT_MISS)   ] = 0x000e,  /* CODE_CACHE_MISS    */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x10cb,  /* L2_READ_MISS */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x10cc,  /* L2_WRITE_HIT */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x10fc,  /* L2_DATA_PF2      */
+               [ C(RESULT_MISS)   ] = 0x10fe,  /* L2_DATA_PF2_MISS */
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+                                               /* DATA_READ */
+                                               /* see note on L1 OP_READ */
+               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE */
+               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = 0x0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ */
+               [ C(RESULT_MISS)   ] = 0x000d,  /* CODE_PAGE_WALK */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0012,  /* BRANCHES */
+               [ C(RESULT_MISS)   ] = 0x002b,  /* BRANCHES_MISPREDICTED */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+
+static u64 knc_pmu_event_map(int hw_event)
+{
+       return knc_perfmon_event_map[hw_event];
+}
+
+static struct event_constraint knc_event_constraints[] =
+{
+       INTEL_EVENT_CONSTRAINT(0xc3, 0x1),      /* HWP_L2HIT */
+       INTEL_EVENT_CONSTRAINT(0xc4, 0x1),      /* HWP_L2MISS */
+       INTEL_EVENT_CONSTRAINT(0xc8, 0x1),      /* L2_READ_HIT_E */
+       INTEL_EVENT_CONSTRAINT(0xc9, 0x1),      /* L2_READ_HIT_M */
+       INTEL_EVENT_CONSTRAINT(0xca, 0x1),      /* L2_READ_HIT_S */
+       INTEL_EVENT_CONSTRAINT(0xcb, 0x1),      /* L2_READ_MISS */
+       INTEL_EVENT_CONSTRAINT(0xcc, 0x1),      /* L2_WRITE_HIT */
+       INTEL_EVENT_CONSTRAINT(0xce, 0x1),      /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
+       INTEL_EVENT_CONSTRAINT(0xcf, 0x1),      /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
+       INTEL_EVENT_CONSTRAINT(0xd7, 0x1),      /* L2_VICTIM_REQ_WITH_DATA */
+       INTEL_EVENT_CONSTRAINT(0xe3, 0x1),      /* SNP_HITM_BUNIT */
+       INTEL_EVENT_CONSTRAINT(0xe6, 0x1),      /* SNP_HIT_L2 */
+       INTEL_EVENT_CONSTRAINT(0xe7, 0x1),      /* SNP_HITM_L2 */
+       INTEL_EVENT_CONSTRAINT(0xf1, 0x1),      /* L2_DATA_READ_MISS_CACHE_FILL */
+       INTEL_EVENT_CONSTRAINT(0xf2, 0x1),      /* L2_DATA_WRITE_MISS_CACHE_FILL */
+       INTEL_EVENT_CONSTRAINT(0xf6, 0x1),      /* L2_DATA_READ_MISS_MEM_FILL */
+       INTEL_EVENT_CONSTRAINT(0xf7, 0x1),      /* L2_DATA_WRITE_MISS_MEM_FILL */
+       INTEL_EVENT_CONSTRAINT(0xfc, 0x1),      /* L2_DATA_PF2 */
+       INTEL_EVENT_CONSTRAINT(0xfd, 0x1),      /* L2_DATA_PF2_DROP */
+       INTEL_EVENT_CONSTRAINT(0xfe, 0x1),      /* L2_DATA_PF2_MISS */
+       INTEL_EVENT_CONSTRAINT(0xff, 0x1),      /* L2_DATA_HIT_INFLIGHT_PF2 */
+       EVENT_CONSTRAINT_END
+};
+
+#define MSR_KNC_IA32_PERF_GLOBAL_STATUS                0x0000002d
+#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL   0x0000002e
+#define MSR_KNC_IA32_PERF_GLOBAL_CTRL          0x0000002f
+
+#define KNC_ENABLE_COUNTER0                    0x00000001
+#define KNC_ENABLE_COUNTER1                    0x00000002
+
+static void knc_pmu_disable_all(void)
+{
+       u64 val;
+
+       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+       val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static void knc_pmu_enable_all(int added)
+{
+       u64 val;
+
+       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+       val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static inline void
+knc_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val;
+
+       val = hwc->config;
+       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+
+       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static void knc_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val;
+
+       val = hwc->config;
+       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static inline u64 knc_pmu_get_status(void)
+{
+       u64 status;
+
+       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+       return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       int handled = 0;
+       int bit, loops;
+       u64 status;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       knc_pmu_disable_all();
+
+       status = knc_pmu_get_status();
+       if (!status) {
+               knc_pmu_enable_all(0);
+               return handled;
+       }
+
+       loops = 0;
+again:
+       knc_pmu_ack_status(status);
+       if (++loops > 100) {
+               WARN_ONCE(1, "perf: irq loop stuck!\n");
+               perf_event_print_debug();
+               goto done;
+       }
+
+       inc_irq_stat(apic_perf_irqs);
+
+       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+               struct perf_event *event = cpuc->events[bit];
+
+               handled++;
+
+               if (!test_bit(bit, cpuc->active_mask))
+                       continue;
+
+               if (!intel_pmu_save_and_restart(event))
+                       continue;
+
+               perf_sample_data_init(&data, 0, event->hw.last_period);
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       /*
+        * Repeat if there is more work to be done:
+        */
+       status = knc_pmu_get_status();
+       if (status)
+               goto again;
+
+done:
+       /* Only restore PMU state when it's active. See x86_pmu_disable(). */
+       if (cpuc->enabled)
+               knc_pmu_enable_all(0);
+
+       return handled;
+}
+
+
+PMU_FORMAT_ATTR(event, "config:0-7"    );
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+
+static struct attribute *intel_knc_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+static const struct x86_pmu knc_pmu __initconst = {
+       .name                   = "knc",
+       .handle_irq             = knc_pmu_handle_irq,
+       .disable_all            = knc_pmu_disable_all,
+       .enable_all             = knc_pmu_enable_all,
+       .enable                 = knc_pmu_enable_event,
+       .disable                = knc_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_KNC_EVNTSEL0,
+       .perfctr                = MSR_KNC_PERFCTR0,
+       .event_map              = knc_pmu_event_map,
+       .max_events             = ARRAY_SIZE(knc_perfmon_event_map),
+       .apic                   = 1,
+       .max_period             = (1ULL << 39) - 1,
+       .version                = 0,
+       .num_counters           = 2,
+       .cntval_bits            = 40,
+       .cntval_mask            = (1ULL << 40) - 1,
+       .get_event_constraints  = x86_get_event_constraints,
+       .event_constraints      = knc_event_constraints,
+       .format_attrs           = intel_knc_formats_attr,
+};
+
+__init int knc_pmu_init(void)
+{
+       x86_pmu = knc_pmu;
+
+       memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 
+               sizeof(hw_cache_event_ids));
+
+       return 0;
+}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
new file mode 100644 (file)
index 0000000..69dd118
--- /dev/null
@@ -0,0 +1,1062 @@
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/perf_event.h>
+#include <asm/msr.h>
+#include <asm/insn.h>
+
+#include "../perf_event.h"
+
+enum {
+       LBR_FORMAT_32           = 0x00,
+       LBR_FORMAT_LIP          = 0x01,
+       LBR_FORMAT_EIP          = 0x02,
+       LBR_FORMAT_EIP_FLAGS    = 0x03,
+       LBR_FORMAT_EIP_FLAGS2   = 0x04,
+       LBR_FORMAT_INFO         = 0x05,
+       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
+};
+
+static enum {
+       LBR_EIP_FLAGS           = 1,
+       LBR_TSX                 = 2,
+} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+       [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
+       [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
+};
+
+/*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ *
+ * Hardware branch filter (not available on all CPUs)
+ */
+#define LBR_KERNEL_BIT         0 /* do not capture at ring0 */
+#define LBR_USER_BIT           1 /* do not capture at ring > 0 */
+#define LBR_JCC_BIT            2 /* do not capture conditional branches */
+#define LBR_REL_CALL_BIT       3 /* do not capture relative calls */
+#define LBR_IND_CALL_BIT       4 /* do not capture indirect calls */
+#define LBR_RETURN_BIT         5 /* do not capture near returns */
+#define LBR_IND_JMP_BIT                6 /* do not capture indirect jumps */
+#define LBR_REL_JMP_BIT                7 /* do not capture relative jumps */
+#define LBR_FAR_BIT            8 /* do not capture far branches */
+#define LBR_CALL_STACK_BIT     9 /* enable call stack */
+
+/*
+ * Following bit only exists in Linux; we mask it out before writing it to
+ * the actual MSR. But it helps the constraint perf code to understand
+ * that this is a separate configuration.
+ */
+#define LBR_NO_INFO_BIT               63 /* don't read LBR_INFO. */
+
+#define LBR_KERNEL     (1 << LBR_KERNEL_BIT)
+#define LBR_USER       (1 << LBR_USER_BIT)
+#define LBR_JCC                (1 << LBR_JCC_BIT)
+#define LBR_REL_CALL   (1 << LBR_REL_CALL_BIT)
+#define LBR_IND_CALL   (1 << LBR_IND_CALL_BIT)
+#define LBR_RETURN     (1 << LBR_RETURN_BIT)
+#define LBR_REL_JMP    (1 << LBR_REL_JMP_BIT)
+#define LBR_IND_JMP    (1 << LBR_IND_JMP_BIT)
+#define LBR_FAR                (1 << LBR_FAR_BIT)
+#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
+#define LBR_NO_INFO    (1ULL << LBR_NO_INFO_BIT)
+
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
+
+#define LBR_SEL_MASK   0x1ff   /* valid bits in LBR_SELECT */
+#define LBR_NOT_SUPP   -1      /* LBR filter not supported */
+#define LBR_IGN                0       /* ignored */
+
+#define LBR_ANY                 \
+       (LBR_JCC        |\
+        LBR_REL_CALL   |\
+        LBR_IND_CALL   |\
+        LBR_RETURN     |\
+        LBR_REL_JMP    |\
+        LBR_IND_JMP    |\
+        LBR_FAR)
+
+#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
+#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
+
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+       X86_BR_NONE             = 0,      /* unknown */
+
+       X86_BR_USER             = 1 << 0, /* branch target is user */
+       X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
+
+       X86_BR_CALL             = 1 << 2, /* call */
+       X86_BR_RET              = 1 << 3, /* return */
+       X86_BR_SYSCALL          = 1 << 4, /* syscall */
+       X86_BR_SYSRET           = 1 << 5, /* syscall return */
+       X86_BR_INT              = 1 << 6, /* sw interrupt */
+       X86_BR_IRET             = 1 << 7, /* return from interrupt */
+       X86_BR_JCC              = 1 << 8, /* conditional */
+       X86_BR_JMP              = 1 << 9, /* jump */
+       X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
+       X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
+       X86_BR_ABORT            = 1 << 12,/* transaction abort */
+       X86_BR_IN_TX            = 1 << 13,/* in transaction */
+       X86_BR_NO_TX            = 1 << 14,/* not in transaction */
+       X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
+       X86_BR_CALL_STACK       = 1 << 16,/* call stack */
+       X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
+
+#define X86_BR_ANY       \
+       (X86_BR_CALL    |\
+        X86_BR_RET     |\
+        X86_BR_SYSCALL |\
+        X86_BR_SYSRET  |\
+        X86_BR_INT     |\
+        X86_BR_IRET    |\
+        X86_BR_JCC     |\
+        X86_BR_JMP      |\
+        X86_BR_IRQ      |\
+        X86_BR_ABORT    |\
+        X86_BR_IND_CALL |\
+        X86_BR_IND_JMP  |\
+        X86_BR_ZERO_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL                 \
+       (X86_BR_CALL            |\
+        X86_BR_IND_CALL        |\
+        X86_BR_ZERO_CALL       |\
+        X86_BR_SYSCALL         |\
+        X86_BR_IRQ             |\
+        X86_BR_INT)
+
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+
+/*
+ * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
+ * otherwise it becomes near impossible to get a reliable stack.
+ */
+
+static void __intel_pmu_lbr_enable(bool pmi)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       u64 debugctl, lbr_select = 0, orig_debugctl;
+
+       /*
+        * No need to unfreeze manually, as v4 can do that as part
+        * of the GLOBAL_STATUS ack.
+        */
+       if (pmi && x86_pmu.version >= 4)
+               return;
+
+       /*
+        * No need to reprogram LBR_SELECT in a PMI, as it
+        * did not change.
+        */
+       if (cpuc->lbr_sel)
+               lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
+       if (!pmi && cpuc->lbr_sel)
+               wrmsrl(MSR_LBR_SELECT, lbr_select);
+
+       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+       orig_debugctl = debugctl;
+       debugctl |= DEBUGCTLMSR_LBR;
+       /*
+        * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
+        * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
+        * may cause superfluous increase/decrease of LBR_TOS.
+        */
+       if (!(lbr_select & LBR_CALL_STACK))
+               debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+       if (orig_debugctl != debugctl)
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void __intel_pmu_lbr_disable(void)
+{
+       u64 debugctl;
+
+       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+       debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void intel_pmu_lbr_reset_32(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++)
+               wrmsrl(x86_pmu.lbr_from + i, 0);
+}
+
+static void intel_pmu_lbr_reset_64(void)
+{
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               wrmsrl(x86_pmu.lbr_from + i, 0);
+               wrmsrl(x86_pmu.lbr_to   + i, 0);
+               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+                       wrmsrl(MSR_LBR_INFO_0 + i, 0);
+       }
+}
+
+void intel_pmu_lbr_reset(void)
+{
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+               intel_pmu_lbr_reset_32();
+       else
+               intel_pmu_lbr_reset_64();
+}
+
+/*
+ * TOS = most recently recorded branch
+ */
+static inline u64 intel_pmu_lbr_tos(void)
+{
+       u64 tos;
+
+       rdmsrl(x86_pmu.lbr_tos, tos);
+       return tos;
+}
+
+enum {
+       LBR_NONE,
+       LBR_VALID,
+};
+
+static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
+{
+       int i;
+       unsigned lbr_idx, mask;
+       u64 tos;
+
+       if (task_ctx->lbr_callstack_users == 0 ||
+           task_ctx->lbr_stack_state == LBR_NONE) {
+               intel_pmu_lbr_reset();
+               return;
+       }
+
+       mask = x86_pmu.lbr_nr - 1;
+       tos = task_ctx->tos;
+       for (i = 0; i < tos; i++) {
+               lbr_idx = (tos - i) & mask;
+               wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+               wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+                       wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+       }
+       wrmsrl(x86_pmu.lbr_tos, tos);
+       task_ctx->lbr_stack_state = LBR_NONE;
+}
+
+static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
+{
+       int i;
+       unsigned lbr_idx, mask;
+       u64 tos;
+
+       if (task_ctx->lbr_callstack_users == 0) {
+               task_ctx->lbr_stack_state = LBR_NONE;
+               return;
+       }
+
+       mask = x86_pmu.lbr_nr - 1;
+       tos = intel_pmu_lbr_tos();
+       for (i = 0; i < tos; i++) {
+               lbr_idx = (tos - i) & mask;
+               rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+               rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
+       }
+       task_ctx->tos = tos;
+       task_ctx->lbr_stack_state = LBR_VALID;
+}
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
+
+       /*
+        * If LBR callstack feature is enabled and the stack was saved when
+        * the task was scheduled out, restore the stack. Otherwise flush
+        * the LBR stack.
+        */
+       task_ctx = ctx ? ctx->task_ctx_data : NULL;
+       if (task_ctx) {
+               if (sched_in) {
+                       __intel_pmu_lbr_restore(task_ctx);
+                       cpuc->lbr_context = ctx;
+               } else {
+                       __intel_pmu_lbr_save(task_ctx);
+               }
+               return;
+       }
+
+       /*
+        * When sampling the branck stack in system-wide, it may be
+        * necessary to flush the stack on context switch. This happens
+        * when the branch stack does not tag its entries with the pid
+        * of the current task. Otherwise it becomes impossible to
+        * associate a branch entry with a task. This ambiguity is more
+        * likely to appear when the branch stack supports priv level
+        * filtering and the user sets it to monitor only at the user
+        * level (which could be a useful measurement in system-wide
+        * mode). In that case, the risk is high of having a branch
+        * stack with branch from multiple tasks.
+        */
+       if (sched_in) {
+               intel_pmu_lbr_reset();
+               cpuc->lbr_context = ctx;
+       }
+}
+
+static inline bool branch_user_callstack(unsigned br_sel)
+{
+       return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
+}
+
+void intel_pmu_lbr_enable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
+
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       /*
+        * Reset the LBR stack if we changed task context to
+        * avoid data leaks.
+        */
+       if (event->ctx->task && cpuc->lbr_context != event->ctx) {
+               intel_pmu_lbr_reset();
+               cpuc->lbr_context = event->ctx;
+       }
+       cpuc->br_sel = event->hw.branch_reg.reg;
+
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+                                       event->ctx->task_ctx_data) {
+               task_ctx = event->ctx->task_ctx_data;
+               task_ctx->lbr_callstack_users++;
+       }
+
+       cpuc->lbr_users++;
+       perf_sched_cb_inc(event->ctx->pmu);
+}
+
+void intel_pmu_lbr_disable(struct perf_event *event)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       struct x86_perf_task_context *task_ctx;
+
+       if (!x86_pmu.lbr_nr)
+               return;
+
+       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
+                                       event->ctx->task_ctx_data) {
+               task_ctx = event->ctx->task_ctx_data;
+               task_ctx->lbr_callstack_users--;
+       }
+
+       cpuc->lbr_users--;
+       WARN_ON_ONCE(cpuc->lbr_users < 0);
+       perf_sched_cb_dec(event->ctx->pmu);
+
+       if (cpuc->enabled && !cpuc->lbr_users) {
+               __intel_pmu_lbr_disable();
+               /* avoid stale pointer */
+               cpuc->lbr_context = NULL;
+       }
+}
+
+void intel_pmu_lbr_enable_all(bool pmi)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->lbr_users)
+               __intel_pmu_lbr_enable(pmi);
+}
+
+void intel_pmu_lbr_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (cpuc->lbr_users)
+               __intel_pmu_lbr_disable();
+}
+
+static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+{
+       unsigned long mask = x86_pmu.lbr_nr - 1;
+       u64 tos = intel_pmu_lbr_tos();
+       int i;
+
+       for (i = 0; i < x86_pmu.lbr_nr; i++) {
+               unsigned long lbr_idx = (tos - i) & mask;
+               union {
+                       struct {
+                               u32 from;
+                               u32 to;
+                       };
+                       u64     lbr;
+               } msr_lastbranch;
+
+               rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
+
+               cpuc->lbr_entries[i].from       = msr_lastbranch.from;
+               cpuc->lbr_entries[i].to         = msr_lastbranch.to;
+               cpuc->lbr_entries[i].mispred    = 0;
+               cpuc->lbr_entries[i].predicted  = 0;
+               cpuc->lbr_entries[i].reserved   = 0;
+       }
+       cpuc->lbr_stack.nr = i;
+}
+
+/*
+ * Due to lack of segmentation in Linux the effective address (offset)
+ * is the same as the linear address, allowing us to merge the LIP and EIP
+ * LBR formats.
+ */
+static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+{
+       bool need_info = false;
+       unsigned long mask = x86_pmu.lbr_nr - 1;
+       int lbr_format = x86_pmu.intel_cap.lbr_format;
+       u64 tos = intel_pmu_lbr_tos();
+       int i;
+       int out = 0;
+       int num = x86_pmu.lbr_nr;
+
+       if (cpuc->lbr_sel) {
+               need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
+               if (cpuc->lbr_sel->config & LBR_CALL_STACK)
+                       num = tos;
+       }
+
+       for (i = 0; i < num; i++) {
+               unsigned long lbr_idx = (tos - i) & mask;
+               u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+               int skip = 0;
+               u16 cycles = 0;
+               int lbr_flags = lbr_desc[lbr_format];
+
+               rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
+               rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
+
+               if (lbr_format == LBR_FORMAT_INFO && need_info) {
+                       u64 info;
+
+                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
+                       mis = !!(info & LBR_INFO_MISPRED);
+                       pred = !mis;
+                       in_tx = !!(info & LBR_INFO_IN_TX);
+                       abort = !!(info & LBR_INFO_ABORT);
+                       cycles = (info & LBR_INFO_CYCLES);
+               }
+               if (lbr_flags & LBR_EIP_FLAGS) {
+                       mis = !!(from & LBR_FROM_FLAG_MISPRED);
+                       pred = !mis;
+                       skip = 1;
+               }
+               if (lbr_flags & LBR_TSX) {
+                       in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+                       abort = !!(from & LBR_FROM_FLAG_ABORT);
+                       skip = 3;
+               }
+               from = (u64)((((s64)from) << skip) >> skip);
+
+               /*
+                * Some CPUs report duplicated abort records,
+                * with the second entry not having an abort bit set.
+                * Skip them here. This loop runs backwards,
+                * so we need to undo the previous record.
+                * If the abort just happened outside the window
+                * the extra entry cannot be removed.
+                */
+               if (abort && x86_pmu.lbr_double_abort && out > 0)
+                       out--;
+
+               cpuc->lbr_entries[out].from      = from;
+               cpuc->lbr_entries[out].to        = to;
+               cpuc->lbr_entries[out].mispred   = mis;
+               cpuc->lbr_entries[out].predicted = pred;
+               cpuc->lbr_entries[out].in_tx     = in_tx;
+               cpuc->lbr_entries[out].abort     = abort;
+               cpuc->lbr_entries[out].cycles    = cycles;
+               cpuc->lbr_entries[out].reserved  = 0;
+               out++;
+       }
+       cpuc->lbr_stack.nr = out;
+}
+
+void intel_pmu_lbr_read(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       if (!cpuc->lbr_users)
+               return;
+
+       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
+               intel_pmu_lbr_read_32(cpuc);
+       else
+               intel_pmu_lbr_read_64(cpuc);
+
+       intel_pmu_lbr_filter(cpuc);
+}
+
+/*
+ * SW filter is used:
+ * - in case there is no HW filter
+ * - in case the HW filter has errata or limitations
+ */
+static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+{
+       u64 br_type = event->attr.branch_sample_type;
+       int mask = 0;
+
+       if (br_type & PERF_SAMPLE_BRANCH_USER)
+               mask |= X86_BR_USER;
+
+       if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+               mask |= X86_BR_KERNEL;
+
+       /* we ignore BRANCH_HV here */
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY)
+               mask |= X86_BR_ANY;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+               mask |= X86_BR_ANY_CALL;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+               mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+       if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+               mask |= X86_BR_IND_CALL;
+
+       if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+               mask |= X86_BR_ABORT;
+
+       if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+               mask |= X86_BR_IN_TX;
+
+       if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+               mask |= X86_BR_NO_TX;
+
+       if (br_type & PERF_SAMPLE_BRANCH_COND)
+               mask |= X86_BR_JCC;
+
+       if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+               if (!x86_pmu_has_lbr_callstack())
+                       return -EOPNOTSUPP;
+               if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
+                       return -EINVAL;
+               mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
+                       X86_BR_CALL_STACK;
+       }
+
+       if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+               mask |= X86_BR_IND_JMP;
+
+       if (br_type & PERF_SAMPLE_BRANCH_CALL)
+               mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+       /*
+        * stash actual user request into reg, it may
+        * be used by fixup code for some CPU
+        */
+       event->hw.branch_reg.reg = mask;
+       return 0;
+}
+
+/*
+ * setup the HW LBR filter
+ * Used only when available, may not be enough to disambiguate
+ * all branches, may need the help of the SW filter
+ */
+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg;
+       u64 br_type = event->attr.branch_sample_type;
+       u64 mask = 0, v;
+       int i;
+
+       for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+               if (!(br_type & (1ULL << i)))
+                       continue;
+
+               v = x86_pmu.lbr_sel_map[i];
+               if (v == LBR_NOT_SUPP)
+                       return -EOPNOTSUPP;
+
+               if (v != LBR_IGN)
+                       mask |= v;
+       }
+
+       reg = &event->hw.branch_reg;
+       reg->idx = EXTRA_REG_LBR;
+
+       /*
+        * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
+        * in suppress mode. So LBR_SELECT should be set to
+        * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
+        */
+       reg->config = mask ^ x86_pmu.lbr_sel_mask;
+
+       if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+           (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+           (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+               reg->config |= LBR_NO_INFO;
+
+       return 0;
+}
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event)
+{
+       int ret = 0;
+
+       /*
+        * no LBR on this PMU
+        */
+       if (!x86_pmu.lbr_nr)
+               return -EOPNOTSUPP;
+
+       /*
+        * setup SW LBR filter
+        */
+       ret = intel_pmu_setup_sw_lbr_filter(event);
+       if (ret)
+               return ret;
+
+       /*
+        * setup HW LBR filter, if any
+        */
+       if (x86_pmu.lbr_sel_map)
+               ret = intel_pmu_setup_hw_lbr_filter(event);
+
+       return ret;
+}
+
+/*
+ * return the type of control flow change at address "from"
+ * intruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ */
+static int branch_type(unsigned long from, unsigned long to, int abort)
+{
+       struct insn insn;
+       void *addr;
+       int bytes_read, bytes_left;
+       int ret = X86_BR_NONE;
+       int ext, to_plm, from_plm;
+       u8 buf[MAX_INSN_SIZE];
+       int is64 = 0;
+
+       to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+       from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+       /*
+        * maybe zero if lbr did not fill up after a reset by the time
+        * we get a PMU interrupt
+        */
+       if (from == 0 || to == 0)
+               return X86_BR_NONE;
+
+       if (abort)
+               return X86_BR_ABORT | to_plm;
+
+       if (from_plm == X86_BR_USER) {
+               /*
+                * can happen if measuring at the user level only
+                * and we interrupt in a kernel thread, e.g., idle.
+                */
+               if (!current->mm)
+                       return X86_BR_NONE;
+
+               /* may fail if text not present */
+               bytes_left = copy_from_user_nmi(buf, (void __user *)from,
+                                               MAX_INSN_SIZE);
+               bytes_read = MAX_INSN_SIZE - bytes_left;
+               if (!bytes_read)
+                       return X86_BR_NONE;
+
+               addr = buf;
+       } else {
+               /*
+                * The LBR logs any address in the IP, even if the IP just
+                * faulted. This means userspace can control the from address.
+                * Ensure we don't blindy read any address by validating it is
+                * a known text address.
+                */
+               if (kernel_text_address(from)) {
+                       addr = (void *)from;
+                       /*
+                        * Assume we can get the maximum possible size
+                        * when grabbing kernel data.  This is not
+                        * _strictly_ true since we could possibly be
+                        * executing up next to a memory hole, but
+                        * it is very unlikely to be a problem.
+                        */
+                       bytes_read = MAX_INSN_SIZE;
+               } else {
+                       return X86_BR_NONE;
+               }
+       }
+
+       /*
+        * decoder needs to know the ABI especially
+        * on 64-bit systems running 32-bit apps
+        */
+#ifdef CONFIG_X86_64
+       is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+#endif
+       insn_init(&insn, addr, bytes_read, is64);
+       insn_get_opcode(&insn);
+       if (!insn.opcode.got)
+               return X86_BR_ABORT;
+
+       switch (insn.opcode.bytes[0]) {
+       case 0xf:
+               switch (insn.opcode.bytes[1]) {
+               case 0x05: /* syscall */
+               case 0x34: /* sysenter */
+                       ret = X86_BR_SYSCALL;
+                       break;
+               case 0x07: /* sysret */
+               case 0x35: /* sysexit */
+                       ret = X86_BR_SYSRET;
+                       break;
+               case 0x80 ... 0x8f: /* conditional */
+                       ret = X86_BR_JCC;
+                       break;
+               default:
+                       ret = X86_BR_NONE;
+               }
+               break;
+       case 0x70 ... 0x7f: /* conditional */
+               ret = X86_BR_JCC;
+               break;
+       case 0xc2: /* near ret */
+       case 0xc3: /* near ret */
+       case 0xca: /* far ret */
+       case 0xcb: /* far ret */
+               ret = X86_BR_RET;
+               break;
+       case 0xcf: /* iret */
+               ret = X86_BR_IRET;
+               break;
+       case 0xcc ... 0xce: /* int */
+               ret = X86_BR_INT;
+               break;
+       case 0xe8: /* call near rel */
+               insn_get_immediate(&insn);
+               if (insn.immediate1.value == 0) {
+                       /* zero length call */
+                       ret = X86_BR_ZERO_CALL;
+                       break;
+               }
+       case 0x9a: /* call far absolute */
+               ret = X86_BR_CALL;
+               break;
+       case 0xe0 ... 0xe3: /* loop jmp */
+               ret = X86_BR_JCC;
+               break;
+       case 0xe9 ... 0xeb: /* jmp */
+               ret = X86_BR_JMP;
+               break;
+       case 0xff: /* call near absolute, call far absolute ind */
+               insn_get_modrm(&insn);
+               ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+               switch (ext) {
+               case 2: /* near ind call */
+               case 3: /* far ind call */
+                       ret = X86_BR_IND_CALL;
+                       break;
+               case 4:
+               case 5:
+                       ret = X86_BR_IND_JMP;
+                       break;
+               }
+               break;
+       default:
+               ret = X86_BR_NONE;
+       }
+       /*
+        * interrupts, traps, faults (and thus ring transition) may
+        * occur on any instructions. Thus, to classify them correctly,
+        * we need to first look at the from and to priv levels. If they
+        * are different and to is in the kernel, then it indicates
+        * a ring transition. If the from instruction is not a ring
+        * transition instr (syscall, systenter, int), then it means
+        * it was a irq, trap or fault.
+        *
+        * we have no way of detecting kernel to kernel faults.
+        */
+       if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+           && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+               ret = X86_BR_IRQ;
+
+       /*
+        * branch priv level determined by target as
+        * is done by HW when LBR_SELECT is implemented
+        */
+       if (ret != X86_BR_NONE)
+               ret |= to_plm;
+
+       return ret;
+}
+
+/*
+ * implement actual branch filter based on user demand.
+ * Hardware may not exactly satisfy that request, thus
+ * we need to inspect opcodes. Mismatched branches are
+ * discarded. Therefore, the number of branches returned
+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ */
+static void
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+{
+       u64 from, to;
+       int br_sel = cpuc->br_sel;
+       int i, j, type;
+       bool compress = false;
+
+       /* if sampling all branches, then nothing to filter */
+       if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+               return;
+
+       for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+
+               from = cpuc->lbr_entries[i].from;
+               to = cpuc->lbr_entries[i].to;
+
+               type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+               if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+                       if (cpuc->lbr_entries[i].in_tx)
+                               type |= X86_BR_IN_TX;
+                       else
+                               type |= X86_BR_NO_TX;
+               }
+
+               /* if type does not correspond, then discard */
+               if (type == X86_BR_NONE || (br_sel & type) != type) {
+                       cpuc->lbr_entries[i].from = 0;
+                       compress = true;
+               }
+       }
+
+       if (!compress)
+               return;
+
+       /* remove all entries with from=0 */
+       for (i = 0; i < cpuc->lbr_stack.nr; ) {
+               if (!cpuc->lbr_entries[i].from) {
+                       j = i;
+                       while (++j < cpuc->lbr_stack.nr)
+                               cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+                       cpuc->lbr_stack.nr--;
+                       if (!cpuc->lbr_entries[i].from)
+                               continue;
+               }
+               i++;
+       }
+}
+
+/*
+ * Map interface branch filters onto LBR filters
+ */
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
+                                               | LBR_IND_JMP | LBR_FAR,
+       /*
+        * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+        */
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
+        LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+       /*
+        * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+        */
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
+};
+
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
+};
+
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
+       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
+       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
+       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
+       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_FAR,
+       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
+       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
+       [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
+                                               | LBR_RETURN | LBR_CALL_STACK,
+       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
+       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
+};
+
+/* core */
+void __init intel_pmu_lbr_init_core(void)
+{
+       x86_pmu.lbr_nr     = 4;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+       /*
+        * SW branch filter usage:
+        * - compensate for lack of HW filter
+        */
+       pr_cont("4-deep LBR, ");
+}
+
+/* nehalem/westmere */
+void __init intel_pmu_lbr_init_nhm(void)
+{
+       x86_pmu.lbr_nr     = 16;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - workaround LBR_SEL errata (see above)
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("16-deep LBR, ");
+}
+
+/* sandy bridge */
+void __init intel_pmu_lbr_init_snb(void)
+{
+       x86_pmu.lbr_nr   = 16;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("16-deep LBR, ");
+}
+
+/* haswell */
+void intel_pmu_lbr_init_hsw(void)
+{
+       x86_pmu.lbr_nr   = 16;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+       pr_cont("16-deep LBR, ");
+}
+
+/* skylake */
+__init void intel_pmu_lbr_init_skl(void)
+{
+       x86_pmu.lbr_nr   = 32;
+       x86_pmu.lbr_tos  = MSR_LBR_TOS;
+       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+       /*
+        * SW branch filter usage:
+        * - support syscall, sysret capture.
+        *   That requires LBR_FAR but that means far
+        *   jmp need to be filtered out
+        */
+       pr_cont("32-deep LBR, ");
+}
+
+/* atom */
+void __init intel_pmu_lbr_init_atom(void)
+{
+       /*
+        * only models starting at stepping 10 seems
+        * to have an operational LBR which can freeze
+        * on PMU interrupt
+        */
+       if (boot_cpu_data.x86_model == 28
+           && boot_cpu_data.x86_mask < 10) {
+               pr_cont("LBR disabled due to erratum");
+               return;
+       }
+
+       x86_pmu.lbr_nr     = 8;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+       /*
+        * SW branch filter usage:
+        * - compensate for lack of HW filter
+        */
+       pr_cont("8-deep LBR, ");
+}
+
+/* Knights Landing */
+void intel_pmu_lbr_init_knl(void)
+{
+       x86_pmu.lbr_nr     = 8;
+       x86_pmu.lbr_tos    = MSR_LBR_TOS;
+       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
+       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+
+       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+
+       pr_cont("8-deep LBR, ");
+}
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
new file mode 100644 (file)
index 0000000..0a5ede1
--- /dev/null
@@ -0,0 +1,1376 @@
+/*
+ * Netburst Performance Events (P4, old Xeon)
+ *
+ *  Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
+ *  Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+#include <asm/perf_event_p4.h>
+#include <asm/hardirq.h>
+#include <asm/apic.h>
+
+#include "../perf_event.h"
+
+#define P4_CNTR_LIMIT 3
+/*
+ * array indices: 0,1 - HT threads, used with HT enabled cpu
+ */
+struct p4_event_bind {
+       unsigned int opcode;                    /* Event code and ESCR selector */
+       unsigned int escr_msr[2];               /* ESCR MSR for this event */
+       unsigned int escr_emask;                /* valid ESCR EventMask bits */
+       unsigned int shared;                    /* event is shared across threads */
+       char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
+};
+
+struct p4_pebs_bind {
+       unsigned int metric_pebs;
+       unsigned int metric_vert;
+};
+
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert)                     \
+       [P4_PEBS_METRIC__##name] = {                            \
+               .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,   \
+               .metric_vert = vert,                            \
+       }
+
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+       P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,  0x0000001, 0x0000001),
+       P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,  0x0000002, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_load_miss_retired,        0x0000004, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_store_miss_retired,       0x0000004, 0x0000002),
+       P4_GEN_PEBS_BIND(dtlb_all_miss_retired,         0x0000004, 0x0000003),
+       P4_GEN_PEBS_BIND(tagged_mispred_branch,         0x0018000, 0x0000010),
+       P4_GEN_PEBS_BIND(mob_load_replay_retired,       0x0000200, 0x0000001),
+       P4_GEN_PEBS_BIND(split_load_retired,            0x0000400, 0x0000001),
+       P4_GEN_PEBS_BIND(split_store_retired,           0x0000400, 0x0000002),
+};
+
+/*
+ * Note that we don't use CCCR1 here, there is an
+ * exception for P4_BSQ_ALLOCATION but we just have
+ * no workaround
+ *
+ * consider this binding as resources which particular
+ * event may borrow, it doesn't contain EventMask,
+ * Tags and friends -- they are left to a caller
+ */
+static struct p4_event_bind p4_event_bind_map[] = {
+       [P4_EVENT_TC_DELIVER_MODE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
+               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
+               .shared         = 1,
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_BPU_FETCH_REQUEST] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
+               .escr_msr       = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_ITLB_REFERENCE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
+               .escr_msr       = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_MEMORY_CANCEL] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
+               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_MEMORY_COMPLETE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
+               .escr_msr       = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_LOAD_PORT_REPLAY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
+               .escr_msr       = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_STORE_PORT_REPLAY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
+               .escr_msr       = { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_MOB_LOAD_REPLAY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
+               .escr_msr       = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_PAGE_WALK_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
+               .escr_msr       = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
+               .shared         = 1,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_BSQ_CACHE_REFERENCE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
+               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_IOQ_ALLOCATION] = {
+               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN)                 |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER)               |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_IOQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
+               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
+               .escr_msr       = { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB)          |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
+               .cntr           = { {2, -1, -1}, {3, -1, -1} },
+       },
+       [P4_EVENT_FSB_DATA_ACTIVITY] = {
+               .opcode         = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
+               .shared         = 1,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_BSQ_ALLOCATION] = {           /* shared ESCR, broken CCCR1 */
+               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
+               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE)      |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE)      |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
+               .cntr           = { {0, -1, -1}, {1, -1, -1} },
+       },
+       [P4_EVENT_BSQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
+               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
+               .escr_msr       = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1)        |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE)     |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE)  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE)  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1)       |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
+               .cntr           = { {2, -1, -1}, {3, -1, -1} },
+       },
+       [P4_EVENT_SSE_INPUT_ASSIST] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_PACKED_SP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_PACKED_DP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_SCALAR_SP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_SCALAR_DP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_64BIT_MMX_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_128BIT_MMX_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_X87_FP_UOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_X87_FP_UOP),
+               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_TC_MISC] = {
+               .opcode         = P4_OPCODE(P4_EVENT_TC_MISC),
+               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_GLOBAL_POWER_EVENTS] = {
+               .opcode         = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_TC_MS_XFER] = {
+               .opcode         = P4_OPCODE(P4_EVENT_TC_MS_XFER),
+               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_UOP_QUEUE_WRITES] = {
+               .opcode         = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
+               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD)     |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER)   |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
+               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_RETIRED_BRANCH_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
+               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
+               .cntr           = { {4, 5, -1}, {6, 7, -1} },
+       },
+       [P4_EVENT_RESOURCE_STALL] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
+               .escr_msr       = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_WC_BUFFER] = {
+               .opcode         = P4_OPCODE(P4_EVENT_WC_BUFFER),
+               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS)               |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
+               .shared         = 1,
+               .cntr           = { {8, 9, -1}, {10, 11, -1} },
+       },
+       [P4_EVENT_B2B_CYCLES] = {
+               .opcode         = P4_OPCODE(P4_EVENT_B2B_CYCLES),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_BNR] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BNR),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_SNOOP] = {
+               .opcode         = P4_OPCODE(P4_EVENT_SNOOP),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_RESPONSE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_RESPONSE),
+               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
+               .escr_emask     = 0,
+               .cntr           = { {0, -1, -1}, {2, -1, -1} },
+       },
+       [P4_EVENT_FRONT_END_EVENT] = {
+               .opcode         = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_EXECUTION_EVENT] = {
+               .opcode         = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_REPLAY_EVENT] = {
+               .opcode         = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_INSTR_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)            |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_UOPS_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_UOP_TYPE] = {
+               .opcode         = P4_OPCODE(P4_EVENT_UOP_TYPE),
+               .escr_msr       = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS)                  |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_BRANCH_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_X87_ASSIST] = {
+               .opcode         = P4_OPCODE(P4_EVENT_X87_ASSIST),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU)                    |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_MACHINE_CLEAR] = {
+               .opcode         = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
+               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR)                |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR)              |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+       [P4_EVENT_INSTR_COMPLETED] = {
+               .opcode         = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
+               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
+               .escr_emask     =
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS)             |
+                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
+               .cntr           = { {12, 13, 16}, {14, 15, 17} },
+       },
+};
+
+#define P4_GEN_CACHE_EVENT(event, bit, metric)                           \
+       p4_config_pack_escr(P4_ESCR_EVENT(event)                        | \
+                           P4_ESCR_EMASK_BIT(event, bit))              | \
+       p4_config_pack_cccr(metric                                      | \
+                           P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
+
+static __initconst const u64 p4_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__1stl_cache_load_miss_retired),
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
+       },
+},
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__dtlb_load_miss_retired),
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0,
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
+                                               P4_PEBS_METRIC__dtlb_store_miss_retired),
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
+                                               P4_PEBS_METRIC__none),
+               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
+                                               P4_PEBS_METRIC__none),
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(NODE) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+/*
+ * Because of Netburst being quite restricted in how many
+ * identical events may run simultaneously, we introduce event aliases,
+ * ie the different events which have the same functionality but
+ * utilize non-intersected resources (ESCR/CCCR/counter registers).
+ *
+ * This allow us to relax restrictions a bit and run two or more
+ * identical events together.
+ *
+ * Never set any custom internal bits such as P4_CONFIG_HT,
+ * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
+ * either up to date automatically or not applicable at all.
+ */
+struct p4_event_alias {
+       u64 original;
+       u64 alternative;
+} p4_event_aliases[] = {
+       {
+               /*
+                * Non-halted cycles can be substituted with non-sleeping cycles (see
+                * Intel SDM Vol3b for details). We need this alias to be able
+                * to run nmi-watchdog and 'perf top' (or any other user space tool
+                * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
+                * simultaneously.
+                */
+       .original       =
+               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
+       .alternative    =
+               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)             |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
+                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
+               p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT          |
+                                   P4_CCCR_COMPARE),
+       },
+};
+
+static u64 p4_get_alias_event(u64 config)
+{
+       u64 config_match;
+       int i;
+
+       /*
+        * Only event with special mark is allowed,
+        * we're to be sure it didn't come as malformed
+        * RAW event.
+        */
+       if (!(config & P4_CONFIG_ALIASABLE))
+               return 0;
+
+       config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
+
+       for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
+               if (config_match == p4_event_aliases[i].original) {
+                       config_match = p4_event_aliases[i].alternative;
+                       break;
+               } else if (config_match == p4_event_aliases[i].alternative) {
+                       config_match = p4_event_aliases[i].original;
+                       break;
+               }
+       }
+
+       if (i >= ARRAY_SIZE(p4_event_aliases))
+               return 0;
+
+       return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
+}
+
+static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
+  /* non-halted CPU clocks */
+  [PERF_COUNT_HW_CPU_CYCLES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))       |
+               P4_CONFIG_ALIASABLE,
+
+  /*
+   * retired instructions
+   * in a sake of simplicity we don't use the FSB tagging
+   */
+  [PERF_COUNT_HW_INSTRUCTIONS] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED)               |
+               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
+               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
+
+  /* cache hits */
+  [PERF_COUNT_HW_CACHE_REFERENCES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
+
+  /* cache misses */
+  [PERF_COUNT_HW_CACHE_MISSES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
+               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
+
+  /* branch instructions retired */
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
+
+  /* mispredicted branches retired */
+  [PERF_COUNT_HW_BRANCH_MISSES]        =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED)      |
+               P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
+
+  /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN):  */
+  [PERF_COUNT_HW_BUS_CYCLES] =
+       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY)           |
+               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
+               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN))        |
+       p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
+};
+
+static struct p4_event_bind *p4_config_get_bind(u64 config)
+{
+       unsigned int evnt = p4_config_unpack_event(config);
+       struct p4_event_bind *bind = NULL;
+
+       if (evnt < ARRAY_SIZE(p4_event_bind_map))
+               bind = &p4_event_bind_map[evnt];
+
+       return bind;
+}
+
+static u64 p4_pmu_event_map(int hw_event)
+{
+       struct p4_event_bind *bind;
+       unsigned int esel;
+       u64 config;
+
+       config = p4_general_events[hw_event];
+       bind = p4_config_get_bind(config);
+       esel = P4_OPCODE_ESEL(bind->opcode);
+       config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
+
+       return config;
+}
+
+/* check cpu model specifics */
+static bool p4_event_match_cpu_model(unsigned int event_idx)
+{
+       /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
+       if (event_idx == P4_EVENT_INSTR_COMPLETED) {
+               if (boot_cpu_data.x86_model != 3 &&
+                       boot_cpu_data.x86_model != 4 &&
+                       boot_cpu_data.x86_model != 6)
+                       return false;
+       }
+
+       /*
+        * For info
+        * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
+        */
+
+       return true;
+}
+
+static int p4_validate_raw_event(struct perf_event *event)
+{
+       unsigned int v, emask;
+
+       /* User data may have out-of-bound event index */
+       v = p4_config_unpack_event(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_event_bind_map))
+               return -EINVAL;
+
+       /* It may be unsupported: */
+       if (!p4_event_match_cpu_model(v))
+               return -EINVAL;
+
+       /*
+        * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
+        * in Architectural Performance Monitoring, it means not
+        * on _which_ logical cpu to count but rather _when_, ie it
+        * depends on logical cpu state -- count event if one cpu active,
+        * none, both or any, so we just allow user to pass any value
+        * desired.
+        *
+        * In turn we always set Tx_OS/Tx_USR bits bound to logical
+        * cpu without their propagation to another cpu
+        */
+
+       /*
+        * if an event is shared across the logical threads
+        * the user needs special permissions to be able to use it
+        */
+       if (p4_ht_active() && p4_event_bind_map[v].shared) {
+               if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+                       return -EACCES;
+       }
+
+       /* ESCR EventMask bits may be invalid */
+       emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
+       if (emask & ~p4_event_bind_map[v].escr_emask)
+               return -EINVAL;
+
+       /*
+        * it may have some invalid PEBS bits
+        */
+       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
+               return -EINVAL;
+
+       v = p4_config_unpack_metric(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_pebs_bind_map))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int p4_hw_config(struct perf_event *event)
+{
+       int cpu = get_cpu();
+       int rc = 0;
+       u32 escr, cccr;
+
+       /*
+        * the reason we use cpu that early is that: if we get scheduled
+        * first time on the same cpu -- we will not need swap thread
+        * specific flags in config (and will save some cpu cycles)
+        */
+
+       cccr = p4_default_cccr_conf(cpu);
+       escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
+                                        event->attr.exclude_user);
+       event->hw.config = p4_config_pack_escr(escr) |
+                          p4_config_pack_cccr(cccr);
+
+       if (p4_ht_active() && p4_ht_thread(cpu))
+               event->hw.config = p4_set_ht_bit(event->hw.config);
+
+       if (event->attr.type == PERF_TYPE_RAW) {
+               struct p4_event_bind *bind;
+               unsigned int esel;
+               /*
+                * Clear bits we reserve to be managed by kernel itself
+                * and never allowed from a user space
+                */
+                event->attr.config &= P4_CONFIG_MASK;
+
+               rc = p4_validate_raw_event(event);
+               if (rc)
+                       goto out;
+
+               /*
+                * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+                * bits since we keep additional info here (for cache events and etc)
+                */
+               event->hw.config |= event->attr.config;
+               bind = p4_config_get_bind(event->attr.config);
+               if (!bind) {
+                       rc = -EINVAL;
+                       goto out;
+               }
+               esel = P4_OPCODE_ESEL(bind->opcode);
+               event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
+       }
+
+       rc = x86_setup_perfctr(event);
+out:
+       put_cpu();
+       return rc;
+}
+
+static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
+{
+       u64 v;
+
+       /* an official way for overflow indication */
+       rdmsrl(hwc->config_base, v);
+       if (v & P4_CCCR_OVF) {
+               wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
+               return 1;
+       }
+
+       /*
+        * In some circumstances the overflow might issue an NMI but did
+        * not set P4_CCCR_OVF bit. Because a counter holds a negative value
+        * we simply check for high bit being set, if it's cleared it means
+        * the counter has reached zero value and continued counting before
+        * real NMI signal was received:
+        */
+       rdmsrl(hwc->event_base, v);
+       if (!(v & ARCH_P4_UNFLAGGED_BIT))
+               return 1;
+
+       return 0;
+}
+
+static void p4_pmu_disable_pebs(void)
+{
+       /*
+        * FIXME
+        *
+        * It's still allowed that two threads setup same cache
+        * events so we can't simply clear metrics until we knew
+        * no one is depending on us, so we need kind of counter
+        * for "ReplayEvent" users.
+        *
+        * What is more complex -- RAW events, if user (for some
+        * reason) will pass some cache event metric with improper
+        * event opcode -- it's fine from hardware point of view
+        * but completely nonsense from "meaning" of such action.
+        *
+        * So at moment let leave metrics turned on forever -- it's
+        * ok for now but need to be revisited!
+        *
+        * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
+        * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
+        */
+}
+
+static inline void p4_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /*
+        * If event gets disabled while counter is in overflowed
+        * state we need to clear P4_CCCR_OVF, otherwise interrupt get
+        * asserted again and again
+        */
+       (void)wrmsrl_safe(hwc->config_base,
+               p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
+}
+
+static void p4_pmu_disable_all(void)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct perf_event *event = cpuc->events[idx];
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+               p4_pmu_disable_event(event);
+       }
+
+       p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+       struct p4_pebs_bind *bind;
+       unsigned int idx;
+
+       BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+       idx = p4_config_unpack_metric(config);
+       if (idx == P4_PEBS_METRIC__none)
+               return;
+
+       bind = &p4_pebs_bind_map[idx];
+
+       (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
+       (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,      (u64)bind->metric_vert);
+}
+
+static void p4_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       int thread = p4_ht_config_thread(hwc->config);
+       u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
+       unsigned int idx = p4_config_unpack_event(hwc->config);
+       struct p4_event_bind *bind;
+       u64 escr_addr, cccr;
+
+       bind = &p4_event_bind_map[idx];
+       escr_addr = bind->escr_msr[thread];
+
+       /*
+        * - we dont support cascaded counters yet
+        * - and counter 1 is broken (erratum)
+        */
+       WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
+       WARN_ON_ONCE(hwc->idx == 1);
+
+       /* we need a real Event value */
+       escr_conf &= ~P4_ESCR_EVENT_MASK;
+       escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
+
+       cccr = p4_config_unpack_cccr(hwc->config);
+
+       /*
+        * it could be Cache event so we need to write metrics
+        * into additional MSRs
+        */
+       p4_pmu_enable_pebs(hwc->config);
+
+       (void)wrmsrl_safe(escr_addr, escr_conf);
+       (void)wrmsrl_safe(hwc->config_base,
+                               (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
+}
+
+static void p4_pmu_enable_all(int added)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int idx;
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               struct perf_event *event = cpuc->events[idx];
+               if (!test_bit(idx, cpuc->active_mask))
+                       continue;
+               p4_pmu_enable_event(event);
+       }
+}
+
+static int p4_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct perf_sample_data data;
+       struct cpu_hw_events *cpuc;
+       struct perf_event *event;
+       struct hw_perf_event *hwc;
+       int idx, handled = 0;
+       u64 val;
+
+       cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+               int overflow;
+
+               if (!test_bit(idx, cpuc->active_mask)) {
+                       /* catch in-flight IRQs */
+                       if (__test_and_clear_bit(idx, cpuc->running))
+                               handled++;
+                       continue;
+               }
+
+               event = cpuc->events[idx];
+               hwc = &event->hw;
+
+               WARN_ON_ONCE(hwc->idx != idx);
+
+               /* it might be unflagged overflow */
+               overflow = p4_pmu_clear_cccr_ovf(hwc);
+
+               val = x86_perf_event_update(event);
+               if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
+                       continue;
+
+               handled += overflow;
+
+               /* event overflow for sure */
+               perf_sample_data_init(&data, 0, hwc->last_period);
+
+               if (!x86_perf_event_set_period(event))
+                       continue;
+
+
+               if (perf_event_overflow(event, &data, regs))
+                       x86_pmu_stop(event, 0);
+       }
+
+       if (handled)
+               inc_irq_stat(apic_perf_irqs);
+
+       /*
+        * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
+        * been observed that the OVF bit flag has to be cleared first _before_
+        * the LVTPC can be unmasked.
+        *
+        * The reason is the NMI line will continue to be asserted while the OVF
+        * bit is set.  This causes a second NMI to generate if the LVTPC is
+        * unmasked before the OVF bit is cleared, leading to unknown NMI
+        * messages.
+        */
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+       return handled;
+}
+
+/*
+ * swap thread specific fields according to a thread
+ * we are going to run on
+ */
+static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
+{
+       u32 escr, cccr;
+
+       /*
+        * we either lucky and continue on same cpu or no HT support
+        */
+       if (!p4_should_swap_ts(hwc->config, cpu))
+               return;
+
+       /*
+        * the event is migrated from an another logical
+        * cpu, so we need to swap thread specific flags
+        */
+
+       escr = p4_config_unpack_escr(hwc->config);
+       cccr = p4_config_unpack_cccr(hwc->config);
+
+       if (p4_ht_thread(cpu)) {
+               cccr &= ~P4_CCCR_OVF_PMI_T0;
+               cccr |= P4_CCCR_OVF_PMI_T1;
+               if (escr & P4_ESCR_T0_OS) {
+                       escr &= ~P4_ESCR_T0_OS;
+                       escr |= P4_ESCR_T1_OS;
+               }
+               if (escr & P4_ESCR_T0_USR) {
+                       escr &= ~P4_ESCR_T0_USR;
+                       escr |= P4_ESCR_T1_USR;
+               }
+               hwc->config  = p4_config_pack_escr(escr);
+               hwc->config |= p4_config_pack_cccr(cccr);
+               hwc->config |= P4_CONFIG_HT;
+       } else {
+               cccr &= ~P4_CCCR_OVF_PMI_T1;
+               cccr |= P4_CCCR_OVF_PMI_T0;
+               if (escr & P4_ESCR_T1_OS) {
+                       escr &= ~P4_ESCR_T1_OS;
+                       escr |= P4_ESCR_T0_OS;
+               }
+               if (escr & P4_ESCR_T1_USR) {
+                       escr &= ~P4_ESCR_T1_USR;
+                       escr |= P4_ESCR_T0_USR;
+               }
+               hwc->config  = p4_config_pack_escr(escr);
+               hwc->config |= p4_config_pack_cccr(cccr);
+               hwc->config &= ~P4_CONFIG_HT;
+       }
+}
+
+/*
+ * ESCR address hashing is tricky, ESCRs are not sequential
+ * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
+ * the metric between any ESCRs is laid in range [0xa0,0xe1]
+ *
+ * so we make ~70% filled hashtable
+ */
+
+#define P4_ESCR_MSR_BASE               0x000003a0
+#define P4_ESCR_MSR_MAX                        0x000003e1
+#define P4_ESCR_MSR_TABLE_SIZE         (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
+#define P4_ESCR_MSR_IDX(msr)           (msr - P4_ESCR_MSR_BASE)
+#define P4_ESCR_MSR_TABLE_ENTRY(msr)   [P4_ESCR_MSR_IDX(msr)] = msr
+
+static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
+       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
+};
+
+static int p4_get_escr_idx(unsigned int addr)
+{
+       unsigned int idx = P4_ESCR_MSR_IDX(addr);
+
+       if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE      ||
+                       !p4_escr_table[idx]             ||
+                       p4_escr_table[idx] != addr)) {
+               WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
+               return -1;
+       }
+
+       return idx;
+}
+
+static int p4_next_cntr(int thread, unsigned long *used_mask,
+                       struct p4_event_bind *bind)
+{
+       int i, j;
+
+       for (i = 0; i < P4_CNTR_LIMIT; i++) {
+               j = bind->cntr[thread][i];
+               if (j != -1 && !test_bit(j, used_mask))
+                       return j;
+       }
+
+       return -1;
+}
+
+static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
+       int cpu = smp_processor_id();
+       struct hw_perf_event *hwc;
+       struct p4_event_bind *bind;
+       unsigned int i, thread, num;
+       int cntr_idx, escr_idx;
+       u64 config_alias;
+       int pass;
+
+       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+       bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
+
+       for (i = 0, num = n; i < n; i++, num--) {
+
+               hwc = &cpuc->event_list[i]->hw;
+               thread = p4_ht_thread(cpu);
+               pass = 0;
+
+again:
+               /*
+                * It's possible to hit a circular lock
+                * between original and alternative events
+                * if both are scheduled already.
+                */
+               if (pass > 2)
+                       goto done;
+
+               bind = p4_config_get_bind(hwc->config);
+               escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
+               if (unlikely(escr_idx == -1))
+                       goto done;
+
+               if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
+                       cntr_idx = hwc->idx;
+                       if (assign)
+                               assign[i] = hwc->idx;
+                       goto reserve;
+               }
+
+               cntr_idx = p4_next_cntr(thread, used_mask, bind);
+               if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
+                       /*
+                        * Check whether an event alias is still available.
+                        */
+                       config_alias = p4_get_alias_event(hwc->config);
+                       if (!config_alias)
+                               goto done;
+                       hwc->config = config_alias;
+                       pass++;
+                       goto again;
+               }
+               /*
+                * Perf does test runs to see if a whole group can be assigned
+                * together succesfully.  There can be multiple rounds of this.
+                * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
+                * bits, such that the next round of group assignments will
+                * cause the above p4_should_swap_ts to pass instead of fail.
+                * This leads to counters exclusive to thread0 being used by
+                * thread1.
+                *
+                * Solve this with a cheap hack, reset the idx back to -1 to
+                * force a new lookup (p4_next_cntr) to get the right counter
+                * for the right thread.
+                *
+                * This probably doesn't comply with the general spirit of how
+                * perf wants to work, but P4 is special. :-(
+                */
+               if (p4_should_swap_ts(hwc->config, cpu))
+                       hwc->idx = -1;
+               p4_pmu_swap_config_ts(hwc, cpu);
+               if (assign)
+                       assign[i] = cntr_idx;
+reserve:
+               set_bit(cntr_idx, used_mask);
+               set_bit(escr_idx, escr_mask);
+       }
+
+done:
+       return num ? -EINVAL : 0;
+}
+
+PMU_FORMAT_ATTR(cccr, "config:0-31" );
+PMU_FORMAT_ATTR(escr, "config:32-62");
+PMU_FORMAT_ATTR(ht,   "config:63"   );
+
+static struct attribute *intel_p4_formats_attr[] = {
+       &format_attr_cccr.attr,
+       &format_attr_escr.attr,
+       &format_attr_ht.attr,
+       NULL,
+};
+
+static __initconst const struct x86_pmu p4_pmu = {
+       .name                   = "Netburst P4/Xeon",
+       .handle_irq             = p4_pmu_handle_irq,
+       .disable_all            = p4_pmu_disable_all,
+       .enable_all             = p4_pmu_enable_all,
+       .enable                 = p4_pmu_enable_event,
+       .disable                = p4_pmu_disable_event,
+       .eventsel               = MSR_P4_BPU_CCCR0,
+       .perfctr                = MSR_P4_BPU_PERFCTR0,
+       .event_map              = p4_pmu_event_map,
+       .max_events             = ARRAY_SIZE(p4_general_events),
+       .get_event_constraints  = x86_get_event_constraints,
+       /*
+        * IF HT disabled we may need to use all
+        * ARCH_P4_MAX_CCCR counters simulaneously
+        * though leave it restricted at moment assuming
+        * HT is on
+        */
+       .num_counters           = ARCH_P4_MAX_CCCR,
+       .apic                   = 1,
+       .cntval_bits            = ARCH_P4_CNTRVAL_BITS,
+       .cntval_mask            = ARCH_P4_CNTRVAL_MASK,
+       .max_period             = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
+       .hw_config              = p4_hw_config,
+       .schedule_events        = p4_pmu_schedule_events,
+       /*
+        * This handles erratum N15 in intel doc 249199-029,
+        * the counter may not be updated correctly on write
+        * so we need a second write operation to do the trick
+        * (the official workaround didn't work)
+        *
+        * the former idea is taken from OProfile code
+        */
+       .perfctr_second_write   = 1,
+
+       .format_attrs           = intel_p4_formats_attr,
+};
+
+__init int p4_pmu_init(void)
+{
+       unsigned int low, high;
+       int i, reg;
+
+       /* If we get stripped -- indexing fails */
+       BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
+
+       rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+       if (!(low & (1 << 7))) {
+               pr_cont("unsupported Netburst CPU model %d ",
+                       boot_cpu_data.x86_model);
+               return -ENODEV;
+       }
+
+       memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
+               sizeof(hw_cache_event_ids));
+
+       pr_cont("Netburst events, ");
+
+       x86_pmu = p4_pmu;
+
+       /*
+        * Even though the counters are configured to interrupt a particular
+        * logical processor when an overflow happens, testing has shown that
+        * on kdump kernels (which uses a single cpu), thread1's counter
+        * continues to run and will report an NMI on thread0.  Due to the
+        * overflow bug, this leads to a stream of unknown NMIs.
+        *
+        * Solve this by zero'ing out the registers to mimic a reset.
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu_config_addr(i);
+               wrmsrl_safe(reg, 0ULL);
+       }
+
+       return 0;
+}
diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c
new file mode 100644 (file)
index 0000000..1f5c47a
--- /dev/null
@@ -0,0 +1,279 @@
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf_event.h"
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]           = 0x0079,       /* CPU_CLK_UNHALTED */
+  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x00c0,       /* INST_RETIRED     */
+  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0f2e,       /* L2_RQSTS:M:E:S:I */
+  [PERF_COUNT_HW_CACHE_MISSES]         = 0x012e,       /* L2_RQSTS:I       */
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x00c4,       /* BR_INST_RETIRED  */
+  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x00c5,       /* BR_MISS_PRED_RETIRED */
+  [PERF_COUNT_HW_BUS_CYCLES]           = 0x0062,       /* BUS_DRDY_CLOCKS  */
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2,    /* RESOURCE_STALLS  */
+
+};
+
+static const u64 __initconst p6_hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS       */
+                [ C(RESULT_MISS)   ] = 0x0045, /* DCU_LINES_IN        */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0f29,  /* L2_LD:M:E:S:I       */
+       },
+        [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+        },
+ },
+ [ C(L1I ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
+               [ C(RESULT_MISS)   ] = 0x0f28,  /* L2_IFETCH:M:E:S:I  */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0x0025,  /* L2_M_LINES_INM     */
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(DTLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS      */
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = 0,
+               [ C(RESULT_MISS)   ] = 0,
+       },
+ },
+ [ C(ITLB) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
+               [ C(RESULT_MISS)   ] = 0x0085,  /* ITLB_MISS          */
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+ [ C(BPU ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = 0x00c4,  /* BR_INST_RETIRED      */
+               [ C(RESULT_MISS)   ] = 0x00c5,  /* BR_MISS_PRED_RETIRED */
+        },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = -1,
+               [ C(RESULT_MISS)   ] = -1,
+       },
+ },
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+       return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT                   0x0000002EULL
+
+static struct event_constraint p6_event_constraints[] =
+{
+       INTEL_EVENT_CONSTRAINT(0xc1, 0x1),      /* FLOPS */
+       INTEL_EVENT_CONSTRAINT(0x10, 0x1),      /* FP_COMP_OPS_EXE */
+       INTEL_EVENT_CONSTRAINT(0x11, 0x2),      /* FP_ASSIST */
+       INTEL_EVENT_CONSTRAINT(0x12, 0x2),      /* MUL */
+       INTEL_EVENT_CONSTRAINT(0x13, 0x2),      /* DIV */
+       INTEL_EVENT_CONSTRAINT(0x14, 0x1),      /* CYCLES_DIV_BUSY */
+       EVENT_CONSTRAINT_END
+};
+
+static void p6_pmu_disable_all(void)
+{
+       u64 val;
+
+       /* p6 only has one enable register */
+       rdmsrl(MSR_P6_EVNTSEL0, val);
+       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+       wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void p6_pmu_enable_all(int added)
+{
+       unsigned long val;
+
+       /* p6 only has one enable register */
+       rdmsrl(MSR_P6_EVNTSEL0, val);
+       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+       wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static inline void
+p6_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val = P6_NOP_EVENT;
+
+       (void)wrmsrl_safe(hwc->config_base, val);
+}
+
+static void p6_pmu_enable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 val;
+
+       val = hwc->config;
+
+       /*
+        * p6 only has a global event enable, set on PerfEvtSel0
+        * We "disable" events by programming P6_NOP_EVENT
+        * and we rely on p6_pmu_enable_all() being called
+        * to actually enable the events.
+        */
+
+       (void)wrmsrl_safe(hwc->config_base, val);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7"    );
+PMU_FORMAT_ATTR(umask, "config:8-15"   );
+PMU_FORMAT_ATTR(edge,  "config:18"     );
+PMU_FORMAT_ATTR(pc,    "config:19"     );
+PMU_FORMAT_ATTR(inv,   "config:23"     );
+PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+
+static struct attribute *intel_p6_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_pc.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask.attr,
+       NULL,
+};
+
+static __initconst const struct x86_pmu p6_pmu = {
+       .name                   = "p6",
+       .handle_irq             = x86_pmu_handle_irq,
+       .disable_all            = p6_pmu_disable_all,
+       .enable_all             = p6_pmu_enable_all,
+       .enable                 = p6_pmu_enable_event,
+       .disable                = p6_pmu_disable_event,
+       .hw_config              = x86_pmu_hw_config,
+       .schedule_events        = x86_schedule_events,
+       .eventsel               = MSR_P6_EVNTSEL0,
+       .perfctr                = MSR_P6_PERFCTR0,
+       .event_map              = p6_pmu_event_map,
+       .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
+       .apic                   = 1,
+       .max_period             = (1ULL << 31) - 1,
+       .version                = 0,
+       .num_counters           = 2,
+       /*
+        * Events have 40 bits implemented. However they are designed such
+        * that bits [32-39] are sign extensions of bit 31. As such the
+        * effective width of a event for P6-like PMU is 32 bits only.
+        *
+        * See IA-32 Intel Architecture Software developer manual Vol 3B
+        */
+       .cntval_bits            = 32,
+       .cntval_mask            = (1ULL << 32) - 1,
+       .get_event_constraints  = x86_get_event_constraints,
+       .event_constraints      = p6_event_constraints,
+
+       .format_attrs           = intel_p6_formats_attr,
+       .events_sysfs_show      = intel_event_sysfs_show,
+
+};
+
+static __init void p6_pmu_rdpmc_quirk(void)
+{
+       if (boot_cpu_data.x86_mask < 9) {
+               /*
+                * PPro erratum 26; fixed in stepping 9 and above.
+                */
+               pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
+               x86_pmu.attr_rdpmc_broken = 1;
+               x86_pmu.attr_rdpmc = 0;
+       }
+}
+
+__init int p6_pmu_init(void)
+{
+       x86_pmu = p6_pmu;
+
+       switch (boot_cpu_data.x86_model) {
+       case  1: /* Pentium Pro */
+               x86_add_quirk(p6_pmu_rdpmc_quirk);
+               break;
+
+       case  3: /* Pentium II - Klamath */
+       case  5: /* Pentium II - Deschutes */
+       case  6: /* Pentium II - Mendocino */
+               break;
+
+       case  7: /* Pentium III - Katmai */
+       case  8: /* Pentium III - Coppermine */
+       case 10: /* Pentium III Xeon */
+       case 11: /* Pentium III - Tualatin */
+               break;
+
+       case  9: /* Pentium M - Banias */
+       case 13: /* Pentium M - Dothan */
+               break;
+
+       default:
+               pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
+               return -ENODEV;
+       }
+
+       memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
+               sizeof(hw_cache_event_ids));
+
+       return 0;
+}
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
new file mode 100644 (file)
index 0000000..6af7cf7
--- /dev/null
@@ -0,0 +1,1188 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#undef DEBUG
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include <asm/perf_event.h>
+#include <asm/insn.h>
+#include <asm/io.h>
+#include <asm/intel_pt.h>
+
+#include "../perf_event.h"
+#include "pt.h"
+
+static DEFINE_PER_CPU(struct pt, pt_ctx);
+
+static struct pt_pmu pt_pmu;
+
+enum cpuid_regs {
+       CR_EAX = 0,
+       CR_ECX,
+       CR_EDX,
+       CR_EBX
+};
+
+/*
+ * Capabilities of Intel PT hardware, such as number of address bits or
+ * supported output schemes, are cached and exported to userspace as "caps"
+ * attribute group of pt pmu device
+ * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
+ * relevant bits together with intel_pt traces.
+ *
+ * These are necessary for both trace decoding (payloads_lip, contains address
+ * width encoded in IP-related packets), and event configuration (bitmasks with
+ * permitted values for certain bit fields).
+ */
+#define PT_CAP(_n, _l, _r, _m)                                         \
+       [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l,        \
+                           .reg = _r, .mask = _m }
+
+static struct pt_cap_desc {
+       const char      *name;
+       u32             leaf;
+       u8              reg;
+       u32             mask;
+} pt_caps[] = {
+       PT_CAP(max_subleaf,             0, CR_EAX, 0xffffffff),
+       PT_CAP(cr3_filtering,           0, CR_EBX, BIT(0)),
+       PT_CAP(psb_cyc,                 0, CR_EBX, BIT(1)),
+       PT_CAP(mtc,                     0, CR_EBX, BIT(3)),
+       PT_CAP(topa_output,             0, CR_ECX, BIT(0)),
+       PT_CAP(topa_multiple_entries,   0, CR_ECX, BIT(1)),
+       PT_CAP(single_range_output,     0, CR_ECX, BIT(2)),
+       PT_CAP(payloads_lip,            0, CR_ECX, BIT(31)),
+       PT_CAP(mtc_periods,             1, CR_EAX, 0xffff0000),
+       PT_CAP(cycle_thresholds,        1, CR_EBX, 0xffff),
+       PT_CAP(psb_periods,             1, CR_EBX, 0xffff0000),
+};
+
+static u32 pt_cap_get(enum pt_capabilities cap)
+{
+       struct pt_cap_desc *cd = &pt_caps[cap];
+       u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
+       unsigned int shift = __ffs(cd->mask);
+
+       return (c & cd->mask) >> shift;
+}
+
+static ssize_t pt_cap_show(struct device *cdev,
+                          struct device_attribute *attr,
+                          char *buf)
+{
+       struct dev_ext_attribute *ea =
+               container_of(attr, struct dev_ext_attribute, attr);
+       enum pt_capabilities cap = (long)ea->var;
+
+       return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
+}
+
+static struct attribute_group pt_cap_group = {
+       .name   = "caps",
+};
+
+PMU_FORMAT_ATTR(cyc,           "config:1"      );
+PMU_FORMAT_ATTR(mtc,           "config:9"      );
+PMU_FORMAT_ATTR(tsc,           "config:10"     );
+PMU_FORMAT_ATTR(noretcomp,     "config:11"     );
+PMU_FORMAT_ATTR(mtc_period,    "config:14-17"  );
+PMU_FORMAT_ATTR(cyc_thresh,    "config:19-22"  );
+PMU_FORMAT_ATTR(psb_period,    "config:24-27"  );
+
+static struct attribute *pt_formats_attr[] = {
+       &format_attr_cyc.attr,
+       &format_attr_mtc.attr,
+       &format_attr_tsc.attr,
+       &format_attr_noretcomp.attr,
+       &format_attr_mtc_period.attr,
+       &format_attr_cyc_thresh.attr,
+       &format_attr_psb_period.attr,
+       NULL,
+};
+
+static struct attribute_group pt_format_group = {
+       .name   = "format",
+       .attrs  = pt_formats_attr,
+};
+
+static const struct attribute_group *pt_attr_groups[] = {
+       &pt_cap_group,
+       &pt_format_group,
+       NULL,
+};
+
+static int __init pt_pmu_hw_init(void)
+{
+       struct dev_ext_attribute *de_attrs;
+       struct attribute **attrs;
+       size_t size;
+       int ret;
+       long i;
+
+       attrs = NULL;
+
+       for (i = 0; i < PT_CPUID_LEAVES; i++) {
+               cpuid_count(20, i,
+                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
+       }
+
+       ret = -ENOMEM;
+       size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
+       attrs = kzalloc(size, GFP_KERNEL);
+       if (!attrs)
+               goto fail;
+
+       size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
+       de_attrs = kzalloc(size, GFP_KERNEL);
+       if (!de_attrs)
+               goto fail;
+
+       for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
+               struct dev_ext_attribute *de_attr = de_attrs + i;
+
+               de_attr->attr.attr.name = pt_caps[i].name;
+
+               sysfs_attr_init(&de_attr->attr.attr);
+
+               de_attr->attr.attr.mode         = S_IRUGO;
+               de_attr->attr.show              = pt_cap_show;
+               de_attr->var                    = (void *)i;
+
+               attrs[i] = &de_attr->attr.attr;
+       }
+
+       pt_cap_group.attrs = attrs;
+
+       return 0;
+
+fail:
+       kfree(attrs);
+
+       return ret;
+}
+
+#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC    | \
+                         RTIT_CTL_CYC_THRESH   | \
+                         RTIT_CTL_PSB_FREQ)
+
+#define RTIT_CTL_MTC   (RTIT_CTL_MTC_EN        | \
+                        RTIT_CTL_MTC_RANGE)
+
+#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN                | \
+                       RTIT_CTL_DISRETC        | \
+                       RTIT_CTL_CYC_PSB        | \
+                       RTIT_CTL_MTC)
+
+static bool pt_event_valid(struct perf_event *event)
+{
+       u64 config = event->attr.config;
+       u64 allowed, requested;
+
+       if ((config & PT_CONFIG_MASK) != config)
+               return false;
+
+       if (config & RTIT_CTL_CYC_PSB) {
+               if (!pt_cap_get(PT_CAP_psb_cyc))
+                       return false;
+
+               allowed = pt_cap_get(PT_CAP_psb_periods);
+               requested = (config & RTIT_CTL_PSB_FREQ) >>
+                       RTIT_CTL_PSB_FREQ_OFFSET;
+               if (requested && (!(allowed & BIT(requested))))
+                       return false;
+
+               allowed = pt_cap_get(PT_CAP_cycle_thresholds);
+               requested = (config & RTIT_CTL_CYC_THRESH) >>
+                       RTIT_CTL_CYC_THRESH_OFFSET;
+               if (requested && (!(allowed & BIT(requested))))
+                       return false;
+       }
+
+       if (config & RTIT_CTL_MTC) {
+               /*
+                * In the unlikely case that CPUID lists valid mtc periods,
+                * but not the mtc capability, drop out here.
+                *
+                * Spec says that setting mtc period bits while mtc bit in
+                * CPUID is 0 will #GP, so better safe than sorry.
+                */
+               if (!pt_cap_get(PT_CAP_mtc))
+                       return false;
+
+               allowed = pt_cap_get(PT_CAP_mtc_periods);
+               if (!allowed)
+                       return false;
+
+               requested = (config & RTIT_CTL_MTC_RANGE) >>
+                       RTIT_CTL_MTC_RANGE_OFFSET;
+
+               if (!(allowed & BIT(requested)))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * PT configuration helpers
+ * These all are cpu affine and operate on a local PT
+ */
+
+static void pt_config(struct perf_event *event)
+{
+       u64 reg;
+
+       if (!event->hw.itrace_started) {
+               event->hw.itrace_started = 1;
+               wrmsrl(MSR_IA32_RTIT_STATUS, 0);
+       }
+
+       reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+
+       if (!event->attr.exclude_kernel)
+               reg |= RTIT_CTL_OS;
+       if (!event->attr.exclude_user)
+               reg |= RTIT_CTL_USR;
+
+       reg |= (event->attr.config & PT_CONFIG_MASK);
+
+       wrmsrl(MSR_IA32_RTIT_CTL, reg);
+}
+
+static void pt_config_start(bool start)
+{
+       u64 ctl;
+
+       rdmsrl(MSR_IA32_RTIT_CTL, ctl);
+       if (start)
+               ctl |= RTIT_CTL_TRACEEN;
+       else
+               ctl &= ~RTIT_CTL_TRACEEN;
+       wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+       /*
+        * A wrmsr that disables trace generation serializes other PT
+        * registers and causes all data packets to be written to memory,
+        * but a fence is required for the data to become globally visible.
+        *
+        * The below WMB, separating data store and aux_head store matches
+        * the consumer's RMB that separates aux_head load and data load.
+        */
+       if (!start)
+               wmb();
+}
+
+static void pt_config_buffer(void *buf, unsigned int topa_idx,
+                            unsigned int output_off)
+{
+       u64 reg;
+
+       wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
+
+       reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
+
+       wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+}
+
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
+
+/**
+ * struct topa - page-sized ToPA table with metadata at the top
+ * @table:     actual ToPA table entries, as understood by PT hardware
+ * @list:      linkage to struct pt_buffer's list of tables
+ * @phys:      physical address of this page
+ * @offset:    offset of the first entry in this table in the buffer
+ * @size:      total size of all entries in this table
+ * @last:      index of the last initialized entry in this table
+ */
+struct topa {
+       struct topa_entry       table[TENTS_PER_PAGE];
+       struct list_head        list;
+       u64                     phys;
+       u64                     offset;
+       size_t                  size;
+       int                     last;
+};
+
+/* make -1 stand for the last table entry */
+#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+
+/**
+ * topa_alloc() - allocate page-sized ToPA table
+ * @cpu:       CPU on which to allocate.
+ * @gfp:       Allocation flags.
+ *
+ * Return:     On success, return the pointer to ToPA table page.
+ */
+static struct topa *topa_alloc(int cpu, gfp_t gfp)
+{
+       int node = cpu_to_node(cpu);
+       struct topa *topa;
+       struct page *p;
+
+       p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
+       if (!p)
+               return NULL;
+
+       topa = page_address(p);
+       topa->last = 0;
+       topa->phys = page_to_phys(p);
+
+       /*
+        * In case of singe-entry ToPA, always put the self-referencing END
+        * link as the 2nd entry in the table
+        */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
+               TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
+               TOPA_ENTRY(topa, 1)->end = 1;
+       }
+
+       return topa;
+}
+
+/**
+ * topa_free() - free a page-sized ToPA table
+ * @topa:      Table to deallocate.
+ */
+static void topa_free(struct topa *topa)
+{
+       free_page((unsigned long)topa);
+}
+
+/**
+ * topa_insert_table() - insert a ToPA table into a buffer
+ * @buf:        PT buffer that's being extended.
+ * @topa:       New topa table to be inserted.
+ *
+ * If it's the first table in this buffer, set up buffer's pointers
+ * accordingly; otherwise, add a END=1 link entry to @topa to the current
+ * "last" table and adjust the last table pointer to @topa.
+ */
+static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
+{
+       struct topa *last = buf->last;
+
+       list_add_tail(&topa->list, &buf->tables);
+
+       if (!buf->first) {
+               buf->first = buf->last = buf->cur = topa;
+               return;
+       }
+
+       topa->offset = last->offset + last->size;
+       buf->last = topa;
+
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               return;
+
+       BUG_ON(last->last != TENTS_PER_PAGE - 1);
+
+       TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+       TOPA_ENTRY(last, -1)->end = 1;
+}
+
+/**
+ * topa_table_full() - check if a ToPA table is filled up
+ * @topa:      ToPA table.
+ */
+static bool topa_table_full(struct topa *topa)
+{
+       /* single-entry ToPA is a special case */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               return !!topa->last;
+
+       return topa->last == TENTS_PER_PAGE - 1;
+}
+
+/**
+ * topa_insert_pages() - create a list of ToPA tables
+ * @buf:       PT buffer being initialized.
+ * @gfp:       Allocation flags.
+ *
+ * This initializes a list of ToPA tables with entries from
+ * the data_pages provided by rb_alloc_aux().
+ *
+ * Return:     0 on success or error code.
+ */
+static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+{
+       struct topa *topa = buf->last;
+       int order = 0;
+       struct page *p;
+
+       p = virt_to_page(buf->data_pages[buf->nr_pages]);
+       if (PagePrivate(p))
+               order = page_private(p);
+
+       if (topa_table_full(topa)) {
+               topa = topa_alloc(buf->cpu, gfp);
+               if (!topa)
+                       return -ENOMEM;
+
+               topa_insert_table(buf, topa);
+       }
+
+       TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
+       TOPA_ENTRY(topa, -1)->size = order;
+       if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
+               TOPA_ENTRY(topa, -1)->intr = 1;
+               TOPA_ENTRY(topa, -1)->stop = 1;
+       }
+
+       topa->last++;
+       topa->size += sizes(order);
+
+       buf->nr_pages += 1ul << order;
+
+       return 0;
+}
+
+/**
+ * pt_topa_dump() - print ToPA tables and their entries
+ * @buf:       PT buffer.
+ */
+static void pt_topa_dump(struct pt_buffer *buf)
+{
+       struct topa *topa;
+
+       list_for_each_entry(topa, &buf->tables, list) {
+               int i;
+
+               pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
+                        topa->phys, topa->offset, topa->size);
+               for (i = 0; i < TENTS_PER_PAGE; i++) {
+                       pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
+                                &topa->table[i],
+                                (unsigned long)topa->table[i].base << TOPA_SHIFT,
+                                sizes(topa->table[i].size),
+                                topa->table[i].end ?  'E' : ' ',
+                                topa->table[i].intr ? 'I' : ' ',
+                                topa->table[i].stop ? 'S' : ' ',
+                                *(u64 *)&topa->table[i]);
+                       if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
+                            topa->table[i].stop) ||
+                           topa->table[i].end)
+                               break;
+               }
+       }
+}
+
+/**
+ * pt_buffer_advance() - advance to the next output region
+ * @buf:       PT buffer.
+ *
+ * Advance the current pointers in the buffer to the next ToPA entry.
+ */
+static void pt_buffer_advance(struct pt_buffer *buf)
+{
+       buf->output_off = 0;
+       buf->cur_idx++;
+
+       if (buf->cur_idx == buf->cur->last) {
+               if (buf->cur == buf->last)
+                       buf->cur = buf->first;
+               else
+                       buf->cur = list_entry(buf->cur->list.next, struct topa,
+                                             list);
+               buf->cur_idx = 0;
+       }
+}
+
+/**
+ * pt_update_head() - calculate current offsets and sizes
+ * @pt:                Per-cpu pt context.
+ *
+ * Update buffer's current write pointer position and data size.
+ */
+static void pt_update_head(struct pt *pt)
+{
+       struct pt_buffer *buf = perf_get_aux(&pt->handle);
+       u64 topa_idx, base, old;
+
+       /* offset of the first region in this table from the beginning of buf */
+       base = buf->cur->offset + buf->output_off;
+
+       /* offset of the current output region within this table */
+       for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
+               base += sizes(buf->cur->table[topa_idx].size);
+
+       if (buf->snapshot) {
+               local_set(&buf->data_size, base);
+       } else {
+               old = (local64_xchg(&buf->head, base) &
+                      ((buf->nr_pages << PAGE_SHIFT) - 1));
+               if (base < old)
+                       base += buf->nr_pages << PAGE_SHIFT;
+
+               local_add(base - old, &buf->data_size);
+       }
+}
+
+/**
+ * pt_buffer_region() - obtain current output region's address
+ * @buf:       PT buffer.
+ */
+static void *pt_buffer_region(struct pt_buffer *buf)
+{
+       return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+}
+
+/**
+ * pt_buffer_region_size() - obtain current output region's size
+ * @buf:       PT buffer.
+ */
+static size_t pt_buffer_region_size(struct pt_buffer *buf)
+{
+       return sizes(buf->cur->table[buf->cur_idx].size);
+}
+
+/**
+ * pt_handle_status() - take care of possible status conditions
+ * @pt:                Per-cpu pt context.
+ */
+static void pt_handle_status(struct pt *pt)
+{
+       struct pt_buffer *buf = perf_get_aux(&pt->handle);
+       int advance = 0;
+       u64 status;
+
+       rdmsrl(MSR_IA32_RTIT_STATUS, status);
+
+       if (status & RTIT_STATUS_ERROR) {
+               pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
+               pt_topa_dump(buf);
+               status &= ~RTIT_STATUS_ERROR;
+       }
+
+       if (status & RTIT_STATUS_STOPPED) {
+               status &= ~RTIT_STATUS_STOPPED;
+
+               /*
+                * On systems that only do single-entry ToPA, hitting STOP
+                * means we are already losing data; need to let the decoder
+                * know.
+                */
+               if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
+                   buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+                       local_inc(&buf->lost);
+                       advance++;
+               }
+       }
+
+       /*
+        * Also on single-entry ToPA implementations, interrupt will come
+        * before the output reaches its output region's boundary.
+        */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
+           pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
+               void *head = pt_buffer_region(buf);
+
+               /* everything within this margin needs to be zeroed out */
+               memset(head + buf->output_off, 0,
+                      pt_buffer_region_size(buf) -
+                      buf->output_off);
+               advance++;
+       }
+
+       if (advance)
+               pt_buffer_advance(buf);
+
+       wrmsrl(MSR_IA32_RTIT_STATUS, status);
+}
+
+/**
+ * pt_read_offset() - translate registers into buffer pointers
+ * @buf:       PT buffer.
+ *
+ * Set buffer's output pointers from MSR values.
+ */
+static void pt_read_offset(struct pt_buffer *buf)
+{
+       u64 offset, base_topa;
+
+       rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
+       buf->cur = phys_to_virt(base_topa);
+
+       rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+       /* offset within current output region */
+       buf->output_off = offset >> 32;
+       /* index of current output region within this table */
+       buf->cur_idx = (offset & 0xffffff80) >> 7;
+}
+
+/**
+ * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
+ * @buf:       PT buffer.
+ * @pg:                Page offset in the buffer.
+ *
+ * When advancing to the next output region (ToPA entry), given a page offset
+ * into the buffer, we need to find the offset of the first page in the next
+ * region.
+ */
+static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+{
+       struct topa_entry *te = buf->topa_index[pg];
+
+       /* one region */
+       if (buf->first == buf->last && buf->first->last == 1)
+               return pg;
+
+       do {
+               pg++;
+               pg &= buf->nr_pages - 1;
+       } while (buf->topa_index[pg] == te);
+
+       return pg;
+}
+
+/**
+ * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
+ * @buf:       PT buffer.
+ * @handle:    Current output handle.
+ *
+ * Place INT and STOP marks to prevent overwriting old data that the consumer
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
+ */
+static int pt_buffer_reset_markers(struct pt_buffer *buf,
+                                  struct perf_output_handle *handle)
+
+{
+       unsigned long head = local64_read(&buf->head);
+       unsigned long idx, npages, wakeup;
+
+       /* can't stop in the middle of an output region */
+       if (buf->output_off + handle->size + 1 <
+           sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
+               return -EINVAL;
+
+
+       /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               return 0;
+
+       /* clear STOP and INT from current entry */
+       buf->topa_index[buf->stop_pos]->stop = 0;
+       buf->topa_index[buf->intr_pos]->intr = 0;
+
+       /* how many pages till the STOP marker */
+       npages = handle->size >> PAGE_SHIFT;
+
+       /* if it's on a page boundary, fill up one more page */
+       if (!offset_in_page(head + handle->size + 1))
+               npages++;
+
+       idx = (head >> PAGE_SHIFT) + npages;
+       idx &= buf->nr_pages - 1;
+       buf->stop_pos = idx;
+
+       wakeup = handle->wakeup >> PAGE_SHIFT;
+
+       /* in the worst case, wake up the consumer one page before hard stop */
+       idx = (head >> PAGE_SHIFT) + npages - 1;
+       if (idx > wakeup)
+               idx = wakeup;
+
+       idx &= buf->nr_pages - 1;
+       buf->intr_pos = idx;
+
+       buf->topa_index[buf->stop_pos]->stop = 1;
+       buf->topa_index[buf->intr_pos]->intr = 1;
+
+       return 0;
+}
+
+/**
+ * pt_buffer_setup_topa_index() - build topa_index[] table of regions
+ * @buf:       PT buffer.
+ *
+ * topa_index[] references output regions indexed by offset into the
+ * buffer for purposes of quick reverse lookup.
+ */
+static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
+{
+       struct topa *cur = buf->first, *prev = buf->last;
+       struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
+               *te_prev = TOPA_ENTRY(prev, prev->last - 1);
+       int pg = 0, idx = 0;
+
+       while (pg < buf->nr_pages) {
+               int tidx;
+
+               /* pages within one topa entry */
+               for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
+                       buf->topa_index[pg] = te_prev;
+
+               te_prev = te_cur;
+
+               if (idx == cur->last - 1) {
+                       /* advance to next topa table */
+                       idx = 0;
+                       cur = list_entry(cur->list.next, struct topa, list);
+               } else {
+                       idx++;
+               }
+               te_cur = TOPA_ENTRY(cur, idx);
+       }
+
+}
+
+/**
+ * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
+ * @buf:       PT buffer.
+ * @head:      Write pointer (aux_head) from AUX buffer.
+ *
+ * Find the ToPA table and entry corresponding to given @head and set buffer's
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
+ */
+static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
+{
+       int pg;
+
+       if (buf->snapshot)
+               head &= (buf->nr_pages << PAGE_SHIFT) - 1;
+
+       pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
+       pg = pt_topa_next_entry(buf, pg);
+
+       buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
+       buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
+                       (unsigned long)buf->cur) / sizeof(struct topa_entry);
+       buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+
+       local64_set(&buf->head, head);
+       local_set(&buf->data_size, 0);
+}
+
+/**
+ * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
+ * @buf:       PT buffer.
+ */
+static void pt_buffer_fini_topa(struct pt_buffer *buf)
+{
+       struct topa *topa, *iter;
+
+       list_for_each_entry_safe(topa, iter, &buf->tables, list) {
+               /*
+                * right now, this is in free_aux() path only, so
+                * no need to unlink this table from the list
+                */
+               topa_free(topa);
+       }
+}
+
+/**
+ * pt_buffer_init_topa() - initialize ToPA table for pt buffer
+ * @buf:       PT buffer.
+ * @size:      Total size of all regions within this ToPA.
+ * @gfp:       Allocation flags.
+ */
+static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
+                              gfp_t gfp)
+{
+       struct topa *topa;
+       int err;
+
+       topa = topa_alloc(buf->cpu, gfp);
+       if (!topa)
+               return -ENOMEM;
+
+       topa_insert_table(buf, topa);
+
+       while (buf->nr_pages < nr_pages) {
+               err = topa_insert_pages(buf, gfp);
+               if (err) {
+                       pt_buffer_fini_topa(buf);
+                       return -ENOMEM;
+               }
+       }
+
+       pt_buffer_setup_topa_index(buf);
+
+       /* link last table to the first one, unless we're double buffering */
+       if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
+               TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+               TOPA_ENTRY(buf->last, -1)->end = 1;
+       }
+
+       pt_topa_dump(buf);
+       return 0;
+}
+
+/**
+ * pt_buffer_setup_aux() - set up topa tables for a PT buffer
+ * @cpu:       Cpu on which to allocate, -1 means current.
+ * @pages:     Array of pointers to buffer pages passed from perf core.
+ * @nr_pages:  Number of pages in the buffer.
+ * @snapshot:  If this is a snapshot/overwrite counter.
+ *
+ * This is a pmu::setup_aux callback that sets up ToPA tables and all the
+ * bookkeeping for an AUX buffer.
+ *
+ * Return:     Our private PT buffer structure.
+ */
+static void *
+pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+{
+       struct pt_buffer *buf;
+       int node, ret;
+
+       if (!nr_pages)
+               return NULL;
+
+       if (cpu == -1)
+               cpu = raw_smp_processor_id();
+       node = cpu_to_node(cpu);
+
+       buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
+                          GFP_KERNEL, node);
+       if (!buf)
+               return NULL;
+
+       buf->cpu = cpu;
+       buf->snapshot = snapshot;
+       buf->data_pages = pages;
+
+       INIT_LIST_HEAD(&buf->tables);
+
+       ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+       if (ret) {
+               kfree(buf);
+               return NULL;
+       }
+
+       return buf;
+}
+
+/**
+ * pt_buffer_free_aux() - perf AUX deallocation path callback
+ * @data:      PT buffer.
+ */
+static void pt_buffer_free_aux(void *data)
+{
+       struct pt_buffer *buf = data;
+
+       pt_buffer_fini_topa(buf);
+       kfree(buf);
+}
+
+/**
+ * pt_buffer_is_full() - check if the buffer is full
+ * @buf:       PT buffer.
+ * @pt:                Per-cpu pt handle.
+ *
+ * If the user hasn't read data from the output region that aux_head
+ * points to, the buffer is considered full: the user needs to read at
+ * least this region and update aux_tail to point past it.
+ */
+static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+{
+       if (buf->snapshot)
+               return false;
+
+       if (local_read(&buf->data_size) >= pt->handle.size)
+               return true;
+
+       return false;
+}
+
+/**
+ * intel_pt_interrupt() - PT PMI handler
+ */
+void intel_pt_interrupt(void)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct pt_buffer *buf;
+       struct perf_event *event = pt->handle.event;
+
+       /*
+        * There may be a dangling PT bit in the interrupt status register
+        * after PT has been disabled by pt_event_stop(). Make sure we don't
+        * do anything (particularly, re-enable) for this event here.
+        */
+       if (!ACCESS_ONCE(pt->handle_nmi))
+               return;
+
+       pt_config_start(false);
+
+       if (!event)
+               return;
+
+       buf = perf_get_aux(&pt->handle);
+       if (!buf)
+               return;
+
+       pt_read_offset(buf);
+
+       pt_handle_status(pt);
+
+       pt_update_head(pt);
+
+       perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+                           local_xchg(&buf->lost, 0));
+
+       if (!event->hw.state) {
+               int ret;
+
+               buf = perf_aux_output_begin(&pt->handle, event);
+               if (!buf) {
+                       event->hw.state = PERF_HES_STOPPED;
+                       return;
+               }
+
+               pt_buffer_reset_offsets(buf, pt->handle.head);
+               /* snapshot counters don't use PMI, so it's safe */
+               ret = pt_buffer_reset_markers(buf, &pt->handle);
+               if (ret) {
+                       perf_aux_output_end(&pt->handle, 0, true);
+                       return;
+               }
+
+               pt_config_buffer(buf->cur->table, buf->cur_idx,
+                                buf->output_off);
+               pt_config(event);
+       }
+}
+
+/*
+ * PMU callbacks
+ */
+
+static void pt_event_start(struct perf_event *event, int mode)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+       if (!buf || pt_buffer_is_full(buf, pt)) {
+               event->hw.state = PERF_HES_STOPPED;
+               return;
+       }
+
+       ACCESS_ONCE(pt->handle_nmi) = 1;
+       event->hw.state = 0;
+
+       pt_config_buffer(buf->cur->table, buf->cur_idx,
+                        buf->output_off);
+       pt_config(event);
+}
+
+static void pt_event_stop(struct perf_event *event, int mode)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+       /*
+        * Protect against the PMI racing with disabling wrmsr,
+        * see comment in intel_pt_interrupt().
+        */
+       ACCESS_ONCE(pt->handle_nmi) = 0;
+       pt_config_start(false);
+
+       if (event->hw.state == PERF_HES_STOPPED)
+               return;
+
+       event->hw.state = PERF_HES_STOPPED;
+
+       if (mode & PERF_EF_UPDATE) {
+               struct pt_buffer *buf = perf_get_aux(&pt->handle);
+
+               if (!buf)
+                       return;
+
+               if (WARN_ON_ONCE(pt->handle.event != event))
+                       return;
+
+               pt_read_offset(buf);
+
+               pt_handle_status(pt);
+
+               pt_update_head(pt);
+       }
+}
+
+static void pt_event_del(struct perf_event *event, int mode)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct pt_buffer *buf;
+
+       pt_event_stop(event, PERF_EF_UPDATE);
+
+       buf = perf_get_aux(&pt->handle);
+
+       if (buf) {
+               if (buf->snapshot)
+                       pt->handle.head =
+                               local_xchg(&buf->data_size,
+                                          buf->nr_pages << PAGE_SHIFT);
+               perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
+                                   local_xchg(&buf->lost, 0));
+       }
+}
+
+static int pt_event_add(struct perf_event *event, int mode)
+{
+       struct pt_buffer *buf;
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct hw_perf_event *hwc = &event->hw;
+       int ret = -EBUSY;
+
+       if (pt->handle.event)
+               goto fail;
+
+       buf = perf_aux_output_begin(&pt->handle, event);
+       ret = -EINVAL;
+       if (!buf)
+               goto fail_stop;
+
+       pt_buffer_reset_offsets(buf, pt->handle.head);
+       if (!buf->snapshot) {
+               ret = pt_buffer_reset_markers(buf, &pt->handle);
+               if (ret)
+                       goto fail_end_stop;
+       }
+
+       if (mode & PERF_EF_START) {
+               pt_event_start(event, 0);
+               ret = -EBUSY;
+               if (hwc->state == PERF_HES_STOPPED)
+                       goto fail_end_stop;
+       } else {
+               hwc->state = PERF_HES_STOPPED;
+       }
+
+       return 0;
+
+fail_end_stop:
+       perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+       hwc->state = PERF_HES_STOPPED;
+fail:
+       return ret;
+}
+
+static void pt_event_read(struct perf_event *event)
+{
+}
+
+static void pt_event_destroy(struct perf_event *event)
+{
+       x86_del_exclusive(x86_lbr_exclusive_pt);
+}
+
+static int pt_event_init(struct perf_event *event)
+{
+       if (event->attr.type != pt_pmu.pmu.type)
+               return -ENOENT;
+
+       if (!pt_event_valid(event))
+               return -EINVAL;
+
+       if (x86_add_exclusive(x86_lbr_exclusive_pt))
+               return -EBUSY;
+
+       event->destroy = pt_event_destroy;
+
+       return 0;
+}
+
+void cpu_emergency_stop_pt(void)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+
+       if (pt->handle.event)
+               pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
+}
+
+static __init int pt_init(void)
+{
+       int ret, cpu, prior_warn = 0;
+
+       BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
+
+       if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+               return -ENODEV;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               u64 ctl;
+
+               ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
+               if (!ret && (ctl & RTIT_CTL_TRACEEN))
+                       prior_warn++;
+       }
+       put_online_cpus();
+
+       if (prior_warn) {
+               x86_add_exclusive(x86_lbr_exclusive_pt);
+               pr_warn("PT is enabled at boot time, doing nothing\n");
+
+               return -EBUSY;
+       }
+
+       ret = pt_pmu_hw_init();
+       if (ret)
+               return ret;
+
+       if (!pt_cap_get(PT_CAP_topa_output)) {
+               pr_warn("ToPA output is not supported on this CPU\n");
+               return -ENODEV;
+       }
+
+       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
+               pt_pmu.pmu.capabilities =
+                       PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+
+       pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
+       pt_pmu.pmu.attr_groups  = pt_attr_groups;
+       pt_pmu.pmu.task_ctx_nr  = perf_sw_context;
+       pt_pmu.pmu.event_init   = pt_event_init;
+       pt_pmu.pmu.add          = pt_event_add;
+       pt_pmu.pmu.del          = pt_event_del;
+       pt_pmu.pmu.start        = pt_event_start;
+       pt_pmu.pmu.stop         = pt_event_stop;
+       pt_pmu.pmu.read         = pt_event_read;
+       pt_pmu.pmu.setup_aux    = pt_buffer_setup_aux;
+       pt_pmu.pmu.free_aux     = pt_buffer_free_aux;
+       ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
+
+       return ret;
+}
+arch_initcall(pt_init);
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
new file mode 100644 (file)
index 0000000..336878a
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * Intel(R) Processor Trace PMU driver for perf
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * Intel PT is specified in the Intel Architecture Instruction Set Extensions
+ * Programming Reference:
+ * http://software.intel.com/en-us/intel-isa-extensions
+ */
+
+#ifndef __INTEL_PT_H__
+#define __INTEL_PT_H__
+
+/*
+ * Single-entry ToPA: when this close to region boundary, switch
+ * buffers to avoid losing data.
+ */
+#define TOPA_PMI_MARGIN 512
+
+#define TOPA_SHIFT 12
+
+static inline unsigned int sizes(unsigned int tsz)
+{
+       return 1 << (tsz + TOPA_SHIFT);
+};
+
+struct topa_entry {
+       u64     end     : 1;
+       u64     rsvd0   : 1;
+       u64     intr    : 1;
+       u64     rsvd1   : 1;
+       u64     stop    : 1;
+       u64     rsvd2   : 1;
+       u64     size    : 4;
+       u64     rsvd3   : 2;
+       u64     base    : 36;
+       u64     rsvd4   : 16;
+};
+
+#define PT_CPUID_LEAVES                2
+#define PT_CPUID_REGS_NUM      4 /* number of regsters (eax, ebx, ecx, edx) */
+
+enum pt_capabilities {
+       PT_CAP_max_subleaf = 0,
+       PT_CAP_cr3_filtering,
+       PT_CAP_psb_cyc,
+       PT_CAP_mtc,
+       PT_CAP_topa_output,
+       PT_CAP_topa_multiple_entries,
+       PT_CAP_single_range_output,
+       PT_CAP_payloads_lip,
+       PT_CAP_mtc_periods,
+       PT_CAP_cycle_thresholds,
+       PT_CAP_psb_periods,
+};
+
+struct pt_pmu {
+       struct pmu              pmu;
+       u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+};
+
+/**
+ * struct pt_buffer - buffer configuration; one buffer per task_struct or
+ *             cpu, depending on perf event configuration
+ * @cpu:       cpu for per-cpu allocation
+ * @tables:    list of ToPA tables in this buffer
+ * @first:     shorthand for first topa table
+ * @last:      shorthand for last topa table
+ * @cur:       current topa table
+ * @nr_pages:  buffer size in pages
+ * @cur_idx:   current output region's index within @cur table
+ * @output_off:        offset within the current output region
+ * @data_size: running total of the amount of data in this buffer
+ * @lost:      if data was lost/truncated
+ * @head:      logical write offset inside the buffer
+ * @snapshot:  if this is for a snapshot/overwrite counter
+ * @stop_pos:  STOP topa entry in the buffer
+ * @intr_pos:  INT topa entry in the buffer
+ * @data_pages:        array of pages from perf
+ * @topa_index:        table of topa entries indexed by page offset
+ */
+struct pt_buffer {
+       int                     cpu;
+       struct list_head        tables;
+       struct topa             *first, *last, *cur;
+       unsigned int            cur_idx;
+       size_t                  output_off;
+       unsigned long           nr_pages;
+       local_t                 data_size;
+       local_t                 lost;
+       local64_t               head;
+       bool                    snapshot;
+       unsigned long           stop_pos, intr_pos;
+       void                    **data_pages;
+       struct topa_entry       *topa_index[0];
+};
+
+/**
+ * struct pt - per-cpu pt context
+ * @handle:    perf output handle
+ * @handle_nmi:        do handle PT PMI on this cpu, there's an active event
+ */
+struct pt {
+       struct perf_output_handle handle;
+       int                     handle_nmi;
+};
+
+#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
new file mode 100644 (file)
index 0000000..b834a3f
--- /dev/null
@@ -0,0 +1,767 @@
+/*
+ * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
+ * Copyright (C) 2013 Google, Inc., Stephane Eranian
+ *
+ * Intel RAPL interface is specified in the IA-32 Manual Vol3b
+ * section 14.7.1 (September 2013)
+ *
+ * RAPL provides more controls than just reporting energy consumption
+ * however here we only expose the 3 energy consumption free running
+ * counters (pp0, pkg, dram).
+ *
+ * Each of those counters increments in a power unit defined by the
+ * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
+ * but it can vary.
+ *
+ * Counter to rapl events mappings:
+ *
+ *  pp0 counter: consumption of all physical cores (power plane 0)
+ *       event: rapl_energy_cores
+ *    perf code: 0x1
+ *
+ *  pkg counter: consumption of the whole processor package
+ *       event: rapl_energy_pkg
+ *    perf code: 0x2
+ *
+ * dram counter: consumption of the dram domain (servers only)
+ *       event: rapl_energy_dram
+ *    perf code: 0x3
+ *
+ * dram counter: consumption of the builtin-gpu domain (client only)
+ *       event: rapl_energy_gpu
+ *    perf code: 0x4
+ *
+ * We manage those counters as free running (read-only). They may be
+ * use simultaneously by other tools, such as turbostat.
+ *
+ * The events only support system-wide mode counting. There is no
+ * sampling support because it does not make sense and is not
+ * supported by the RAPL hardware.
+ *
+ * Because we want to avoid floating-point operations in the kernel,
+ * the events are all reported in fixed point arithmetic (32.32).
+ * Tools must adjust the counts to convert them to Watts using
+ * the duration of the measurement. Tools may use a function such as
+ * ldexp(raw_count, -32);
+ */
+
+#define pr_fmt(fmt) "RAPL PMU: " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_device_id.h>
+#include "../perf_event.h"
+
+/*
+ * RAPL energy status counters
+ */
+#define RAPL_IDX_PP0_NRG_STAT  0       /* all cores */
+#define INTEL_RAPL_PP0         0x1     /* pseudo-encoding */
+#define RAPL_IDX_PKG_NRG_STAT  1       /* entire package */
+#define INTEL_RAPL_PKG         0x2     /* pseudo-encoding */
+#define RAPL_IDX_RAM_NRG_STAT  2       /* DRAM */
+#define INTEL_RAPL_RAM         0x3     /* pseudo-encoding */
+#define RAPL_IDX_PP1_NRG_STAT  3       /* gpu */
+#define INTEL_RAPL_PP1         0x4     /* pseudo-encoding */
+
+#define NR_RAPL_DOMAINS         0x4
+static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
+       "pp0-core",
+       "package",
+       "dram",
+       "pp1-gpu",
+};
+
+/* Clients have PP0, PKG */
+#define RAPL_IDX_CLN   (1<<RAPL_IDX_PP0_NRG_STAT|\
+                        1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_PP1_NRG_STAT)
+
+/* Servers have PP0, PKG, RAM */
+#define RAPL_IDX_SRV   (1<<RAPL_IDX_PP0_NRG_STAT|\
+                        1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_RAM_NRG_STAT)
+
+/* Servers have PP0, PKG, RAM, PP1 */
+#define RAPL_IDX_HSW   (1<<RAPL_IDX_PP0_NRG_STAT|\
+                        1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_RAM_NRG_STAT|\
+                        1<<RAPL_IDX_PP1_NRG_STAT)
+
+/* Knights Landing has PKG, RAM */
+#define RAPL_IDX_KNL   (1<<RAPL_IDX_PKG_NRG_STAT|\
+                        1<<RAPL_IDX_RAM_NRG_STAT)
+
+/*
+ * event code: LSB 8 bits, passed in attr->config
+ * any other bit is reserved
+ */
+#define RAPL_EVENT_MASK        0xFFULL
+
+#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)          \
+static ssize_t __rapl_##_var##_show(struct kobject *kobj,      \
+                               struct kobj_attribute *attr,    \
+                               char *page)                     \
+{                                                              \
+       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
+       return sprintf(page, _format "\n");                     \
+}                                                              \
+static struct kobj_attribute format_attr_##_var =              \
+       __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
+
+#define RAPL_CNTR_WIDTH 32
+
+#define RAPL_EVENT_ATTR_STR(_name, v, str)                                     \
+static struct perf_pmu_events_attr event_attr_##v = {                          \
+       .attr           = __ATTR(_name, 0444, perf_event_sysfs_show, NULL),     \
+       .id             = 0,                                                    \
+       .event_str      = str,                                                  \
+};
+
+struct rapl_pmu {
+       raw_spinlock_t          lock;
+       int                     n_active;
+       int                     cpu;
+       struct list_head        active_list;
+       struct pmu              *pmu;
+       ktime_t                 timer_interval;
+       struct hrtimer          hrtimer;
+};
+
+struct rapl_pmus {
+       struct pmu              pmu;
+       unsigned int            maxpkg;
+       struct rapl_pmu         *pmus[];
+};
+
+ /* 1/2^hw_unit Joule */
+static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
+static struct rapl_pmus *rapl_pmus;
+static cpumask_t rapl_cpu_mask;
+static unsigned int rapl_cntr_mask;
+static u64 rapl_timer_ms;
+
+static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
+{
+       return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+}
+
+static inline u64 rapl_read_counter(struct perf_event *event)
+{
+       u64 raw;
+       rdmsrl(event->hw.event_base, raw);
+       return raw;
+}
+
+static inline u64 rapl_scale(u64 v, int cfg)
+{
+       if (cfg > NR_RAPL_DOMAINS) {
+               pr_warn("Invalid domain %d, failed to scale data\n", cfg);
+               return v;
+       }
+       /*
+        * scale delta to smallest unit (1/2^32)
+        * users must then scale back: count * 1/(1e9*2^32) to get Joules
+        * or use ldexp(count, -32).
+        * Watts = Joules/Time delta
+        */
+       return v << (32 - rapl_hw_unit[cfg - 1]);
+}
+
+static u64 rapl_event_update(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       u64 prev_raw_count, new_raw_count;
+       s64 delta, sdelta;
+       int shift = RAPL_CNTR_WIDTH;
+
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       rdmsrl(event->hw.event_base, new_raw_count);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                           new_raw_count) != prev_raw_count) {
+               cpu_relax();
+               goto again;
+       }
+
+       /*
+        * Now we have the new raw value and have updated the prev
+        * timestamp already. We can now calculate the elapsed delta
+        * (event-)time and add that to the generic event.
+        *
+        * Careful, not all hw sign-extends above the physical width
+        * of the count.
+        */
+       delta = (new_raw_count << shift) - (prev_raw_count << shift);
+       delta >>= shift;
+
+       sdelta = rapl_scale(delta, event->hw.config);
+
+       local64_add(sdelta, &event->count);
+
+       return new_raw_count;
+}
+
+static void rapl_start_hrtimer(struct rapl_pmu *pmu)
+{
+       hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
+                    HRTIMER_MODE_REL_PINNED);
+}
+
+static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
+{
+       struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
+       struct perf_event *event;
+       unsigned long flags;
+
+       if (!pmu->n_active)
+               return HRTIMER_NORESTART;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+
+       list_for_each_entry(event, &pmu->active_list, active_entry)
+               rapl_event_update(event);
+
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+
+       hrtimer_forward_now(hrtimer, pmu->timer_interval);
+
+       return HRTIMER_RESTART;
+}
+
+static void rapl_hrtimer_init(struct rapl_pmu *pmu)
+{
+       struct hrtimer *hr = &pmu->hrtimer;
+
+       hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       hr->function = rapl_hrtimer_handle;
+}
+
+static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
+                                  struct perf_event *event)
+{
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       event->hw.state = 0;
+
+       list_add_tail(&event->active_entry, &pmu->active_list);
+
+       local64_set(&event->hw.prev_count, rapl_read_counter(event));
+
+       pmu->n_active++;
+       if (pmu->n_active == 1)
+               rapl_start_hrtimer(pmu);
+}
+
+static void rapl_pmu_event_start(struct perf_event *event, int mode)
+{
+       struct rapl_pmu *pmu = event->pmu_private;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+       __rapl_pmu_event_start(pmu, event);
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static void rapl_pmu_event_stop(struct perf_event *event, int mode)
+{
+       struct rapl_pmu *pmu = event->pmu_private;
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+
+       /* mark event as deactivated and stopped */
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               WARN_ON_ONCE(pmu->n_active <= 0);
+               pmu->n_active--;
+               if (pmu->n_active == 0)
+                       hrtimer_cancel(&pmu->hrtimer);
+
+               list_del(&event->active_entry);
+
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+       }
+
+       /* check if update of sw counter is necessary */
+       if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               rapl_event_update(event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+}
+
+static int rapl_pmu_event_add(struct perf_event *event, int mode)
+{
+       struct rapl_pmu *pmu = event->pmu_private;
+       struct hw_perf_event *hwc = &event->hw;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pmu->lock, flags);
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+       if (mode & PERF_EF_START)
+               __rapl_pmu_event_start(pmu, event);
+
+       raw_spin_unlock_irqrestore(&pmu->lock, flags);
+
+       return 0;
+}
+
+static void rapl_pmu_event_del(struct perf_event *event, int flags)
+{
+       rapl_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int rapl_pmu_event_init(struct perf_event *event)
+{
+       u64 cfg = event->attr.config & RAPL_EVENT_MASK;
+       int bit, msr, ret = 0;
+       struct rapl_pmu *pmu;
+
+       /* only look at RAPL events */
+       if (event->attr.type != rapl_pmus->pmu.type)
+               return -ENOENT;
+
+       /* check only supported bits are set */
+       if (event->attr.config & ~RAPL_EVENT_MASK)
+               return -EINVAL;
+
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       /*
+        * check event is known (determines counter)
+        */
+       switch (cfg) {
+       case INTEL_RAPL_PP0:
+               bit = RAPL_IDX_PP0_NRG_STAT;
+               msr = MSR_PP0_ENERGY_STATUS;
+               break;
+       case INTEL_RAPL_PKG:
+               bit = RAPL_IDX_PKG_NRG_STAT;
+               msr = MSR_PKG_ENERGY_STATUS;
+               break;
+       case INTEL_RAPL_RAM:
+               bit = RAPL_IDX_RAM_NRG_STAT;
+               msr = MSR_DRAM_ENERGY_STATUS;
+               break;
+       case INTEL_RAPL_PP1:
+               bit = RAPL_IDX_PP1_NRG_STAT;
+               msr = MSR_PP1_ENERGY_STATUS;
+               break;
+       default:
+               return -EINVAL;
+       }
+       /* check event supported */
+       if (!(rapl_cntr_mask & (1 << bit)))
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       /* must be done before validate_group */
+       pmu = cpu_to_rapl_pmu(event->cpu);
+       event->cpu = pmu->cpu;
+       event->pmu_private = pmu;
+       event->hw.event_base = msr;
+       event->hw.config = cfg;
+       event->hw.idx = bit;
+
+       return ret;
+}
+
+static void rapl_pmu_event_read(struct perf_event *event)
+{
+       rapl_event_update(event);
+}
+
+static ssize_t rapl_get_attr_cpumask(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
+
+static struct attribute *rapl_pmu_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group rapl_pmu_attr_group = {
+       .attrs = rapl_pmu_attrs,
+};
+
+RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
+RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
+RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
+RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+
+RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
+
+/*
+ * we compute in 0.23 nJ increments regardless of MSR
+ */
+RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
+
+static struct attribute *rapl_events_srv_attr[] = {
+       EVENT_PTR(rapl_cores),
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_ram),
+
+       EVENT_PTR(rapl_cores_unit),
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_ram_unit),
+
+       EVENT_PTR(rapl_cores_scale),
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_ram_scale),
+       NULL,
+};
+
+static struct attribute *rapl_events_cln_attr[] = {
+       EVENT_PTR(rapl_cores),
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_gpu),
+
+       EVENT_PTR(rapl_cores_unit),
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_gpu_unit),
+
+       EVENT_PTR(rapl_cores_scale),
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_gpu_scale),
+       NULL,
+};
+
+static struct attribute *rapl_events_hsw_attr[] = {
+       EVENT_PTR(rapl_cores),
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_gpu),
+       EVENT_PTR(rapl_ram),
+
+       EVENT_PTR(rapl_cores_unit),
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_gpu_unit),
+       EVENT_PTR(rapl_ram_unit),
+
+       EVENT_PTR(rapl_cores_scale),
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_gpu_scale),
+       EVENT_PTR(rapl_ram_scale),
+       NULL,
+};
+
+static struct attribute *rapl_events_knl_attr[] = {
+       EVENT_PTR(rapl_pkg),
+       EVENT_PTR(rapl_ram),
+
+       EVENT_PTR(rapl_pkg_unit),
+       EVENT_PTR(rapl_ram_unit),
+
+       EVENT_PTR(rapl_pkg_scale),
+       EVENT_PTR(rapl_ram_scale),
+       NULL,
+};
+
+static struct attribute_group rapl_pmu_events_group = {
+       .name = "events",
+       .attrs = NULL, /* patched at runtime */
+};
+
+DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+static struct attribute *rapl_formats_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group rapl_pmu_format_group = {
+       .name = "format",
+       .attrs = rapl_formats_attr,
+};
+
+const struct attribute_group *rapl_attr_groups[] = {
+       &rapl_pmu_attr_group,
+       &rapl_pmu_format_group,
+       &rapl_pmu_events_group,
+       NULL,
+};
+
+static void rapl_cpu_exit(int cpu)
+{
+       struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+       int target;
+
+       /* Check if exiting cpu is used for collecting rapl events */
+       if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
+               return;
+
+       pmu->cpu = -1;
+       /* Find a new cpu to collect rapl events */
+       target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+       /* Migrate rapl events to the new target */
+       if (target < nr_cpu_ids) {
+               cpumask_set_cpu(target, &rapl_cpu_mask);
+               pmu->cpu = target;
+               perf_pmu_migrate_context(pmu->pmu, cpu, target);
+       }
+}
+
+static void rapl_cpu_init(int cpu)
+{
+       struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+       int target;
+
+       /*
+        * Check if there is an online cpu in the package which collects rapl
+        * events already.
+        */
+       target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
+       if (target < nr_cpu_ids)
+               return;
+
+       cpumask_set_cpu(cpu, &rapl_cpu_mask);
+       pmu->cpu = cpu;
+}
+
+static int rapl_cpu_prepare(int cpu)
+{
+       struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
+
+       if (pmu)
+               return 0;
+
+       pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+       if (!pmu)
+               return -ENOMEM;
+
+       raw_spin_lock_init(&pmu->lock);
+       INIT_LIST_HEAD(&pmu->active_list);
+       pmu->pmu = &rapl_pmus->pmu;
+       pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+       pmu->cpu = -1;
+       rapl_hrtimer_init(pmu);
+       rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+       return 0;
+}
+
+static int rapl_cpu_notifier(struct notifier_block *self,
+                            unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               rapl_cpu_prepare(cpu);
+               break;
+
+       case CPU_DOWN_FAILED:
+       case CPU_ONLINE:
+               rapl_cpu_init(cpu);
+               break;
+
+       case CPU_DOWN_PREPARE:
+               rapl_cpu_exit(cpu);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static int rapl_check_hw_unit(bool apply_quirk)
+{
+       u64 msr_rapl_power_unit_bits;
+       int i;
+
+       /* protect rdmsrl() to handle virtualization */
+       if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+               return -1;
+       for (i = 0; i < NR_RAPL_DOMAINS; i++)
+               rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
+
+       /*
+        * DRAM domain on HSW server and KNL has fixed energy unit which can be
+        * different than the unit from power unit MSR. See
+        * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
+        * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
+        */
+       if (apply_quirk)
+               rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
+
+       /*
+        * Calculate the timer rate:
+        * Use reference of 200W for scaling the timeout to avoid counter
+        * overflows. 200W = 200 Joules/sec
+        * Divide interval by 2 to avoid lockstep (2 * 100)
+        * if hw unit is 32, then we use 2 ms 1/200/2
+        */
+       rapl_timer_ms = 2;
+       if (rapl_hw_unit[0] < 32) {
+               rapl_timer_ms = (1000 / (2 * 100));
+               rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
+       }
+       return 0;
+}
+
+static void __init rapl_advertise(void)
+{
+       int i;
+
+       pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
+               hweight32(rapl_cntr_mask), rapl_timer_ms);
+
+       for (i = 0; i < NR_RAPL_DOMAINS; i++) {
+               if (rapl_cntr_mask & (1 << i)) {
+                       pr_info("hw unit of domain %s 2^-%d Joules\n",
+                               rapl_domain_names[i], rapl_hw_unit[i]);
+               }
+       }
+}
+
+static int __init rapl_prepare_cpus(void)
+{
+       unsigned int cpu, pkg;
+       int ret;
+
+       for_each_online_cpu(cpu) {
+               pkg = topology_logical_package_id(cpu);
+               if (rapl_pmus->pmus[pkg])
+                       continue;
+
+               ret = rapl_cpu_prepare(cpu);
+               if (ret)
+                       return ret;
+               rapl_cpu_init(cpu);
+       }
+       return 0;
+}
+
+static void __init cleanup_rapl_pmus(void)
+{
+       int i;
+
+       for (i = 0; i < rapl_pmus->maxpkg; i++)
+               kfree(rapl_pmus->pmus + i);
+       kfree(rapl_pmus);
+}
+
+static int __init init_rapl_pmus(void)
+{
+       int maxpkg = topology_max_packages();
+       size_t size;
+
+       size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *);
+       rapl_pmus = kzalloc(size, GFP_KERNEL);
+       if (!rapl_pmus)
+               return -ENOMEM;
+
+       rapl_pmus->maxpkg               = maxpkg;
+       rapl_pmus->pmu.attr_groups      = rapl_attr_groups;
+       rapl_pmus->pmu.task_ctx_nr      = perf_invalid_context;
+       rapl_pmus->pmu.event_init       = rapl_pmu_event_init;
+       rapl_pmus->pmu.add              = rapl_pmu_event_add;
+       rapl_pmus->pmu.del              = rapl_pmu_event_del;
+       rapl_pmus->pmu.start            = rapl_pmu_event_start;
+       rapl_pmus->pmu.stop             = rapl_pmu_event_stop;
+       rapl_pmus->pmu.read             = rapl_pmu_event_read;
+       return 0;
+}
+
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
+       [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
+       [1] = {},
+};
+
+static int __init rapl_pmu_init(void)
+{
+       bool apply_quirk = false;
+       int ret;
+
+       if (!x86_match_cpu(rapl_cpu_match))
+               return -ENODEV;
+
+       switch (boot_cpu_data.x86_model) {
+       case 42: /* Sandy Bridge */
+       case 58: /* Ivy Bridge */
+               rapl_cntr_mask = RAPL_IDX_CLN;
+               rapl_pmu_events_group.attrs = rapl_events_cln_attr;
+               break;
+       case 63: /* Haswell-Server */
+               apply_quirk = true;
+               rapl_cntr_mask = RAPL_IDX_SRV;
+               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+               break;
+       case 60: /* Haswell */
+       case 69: /* Haswell-Celeron */
+       case 61: /* Broadwell */
+               rapl_cntr_mask = RAPL_IDX_HSW;
+               rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
+               break;
+       case 45: /* Sandy Bridge-EP */
+       case 62: /* IvyTown */
+               rapl_cntr_mask = RAPL_IDX_SRV;
+               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
+               break;
+       case 87: /* Knights Landing */
+               apply_quirk = true;
+               rapl_cntr_mask = RAPL_IDX_KNL;
+               rapl_pmu_events_group.attrs = rapl_events_knl_attr;
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       ret = rapl_check_hw_unit(apply_quirk);
+       if (ret)
+               return ret;
+
+       ret = init_rapl_pmus();
+       if (ret)
+               return ret;
+
+       cpu_notifier_register_begin();
+
+       ret = rapl_prepare_cpus();
+       if (ret)
+               goto out;
+
+       ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
+       if (ret)
+               goto out;
+
+       __perf_cpu_notifier(rapl_cpu_notifier);
+       cpu_notifier_register_done();
+       rapl_advertise();
+       return 0;
+
+out:
+       pr_warn("Initialization failed (%d), disabled\n", ret);
+       cleanup_rapl_pmus();
+       cpu_notifier_register_done();
+       return ret;
+}
+device_initcall(rapl_pmu_init);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
new file mode 100644 (file)
index 0000000..7012d18
--- /dev/null
@@ -0,0 +1,1412 @@
+#include "uncore.h"
+
+static struct intel_uncore_type *empty_uncore[] = { NULL, };
+struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
+struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
+
+static bool pcidrv_registered;
+struct pci_driver *uncore_pci_driver;
+/* pci bus to socket mapping */
+DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
+struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
+struct pci_extra_dev *uncore_extra_pci_dev;
+static int max_packages;
+
+/* mask of cpus that collect uncore events */
+static cpumask_t uncore_cpu_mask;
+
+/* constraint for the fixed counter */
+static struct event_constraint uncore_constraint_fixed =
+       EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+struct event_constraint uncore_constraint_empty =
+       EVENT_CONSTRAINT(0, 0, 0);
+
+static int uncore_pcibus_to_physid(struct pci_bus *bus)
+{
+       struct pci2phy_map *map;
+       int phys_id = -1;
+
+       raw_spin_lock(&pci2phy_map_lock);
+       list_for_each_entry(map, &pci2phy_map_head, list) {
+               if (map->segment == pci_domain_nr(bus)) {
+                       phys_id = map->pbus_to_physid[bus->number];
+                       break;
+               }
+       }
+       raw_spin_unlock(&pci2phy_map_lock);
+
+       return phys_id;
+}
+
+static void uncore_free_pcibus_map(void)
+{
+       struct pci2phy_map *map, *tmp;
+
+       list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
+               list_del(&map->list);
+               kfree(map);
+       }
+}
+
+struct pci2phy_map *__find_pci2phy_map(int segment)
+{
+       struct pci2phy_map *map, *alloc = NULL;
+       int i;
+
+       lockdep_assert_held(&pci2phy_map_lock);
+
+lookup:
+       list_for_each_entry(map, &pci2phy_map_head, list) {
+               if (map->segment == segment)
+                       goto end;
+       }
+
+       if (!alloc) {
+               raw_spin_unlock(&pci2phy_map_lock);
+               alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
+               raw_spin_lock(&pci2phy_map_lock);
+
+               if (!alloc)
+                       return NULL;
+
+               goto lookup;
+       }
+
+       map = alloc;
+       alloc = NULL;
+       map->segment = segment;
+       for (i = 0; i < 256; i++)
+               map->pbus_to_physid[i] = -1;
+       list_add_tail(&map->list, &pci2phy_map_head);
+
+end:
+       kfree(alloc);
+       return map;
+}
+
+ssize_t uncore_event_show(struct kobject *kobj,
+                         struct kobj_attribute *attr, char *buf)
+{
+       struct uncore_event_desc *event =
+               container_of(attr, struct uncore_event_desc, attr);
+       return sprintf(buf, "%s", event->config);
+}
+
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
+{
+       return pmu->boxes[topology_logical_package_id(cpu)];
+}
+
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       u64 count;
+
+       rdmsrl(event->hw.event_base, count);
+
+       return count;
+}
+
+/*
+ * generic get constraint function for shared match/mask registers.
+ */
+struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_extra_reg *er;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       unsigned long flags;
+       bool ok = false;
+
+       /*
+        * reg->alloc can be set due to existing state, so for fake box we
+        * need to ignore this, otherwise we might fail to allocate proper
+        * fake state for this extra reg constraint.
+        */
+       if (reg1->idx == EXTRA_REG_NONE ||
+           (!uncore_box_is_fake(box) && reg1->alloc))
+               return NULL;
+
+       er = &box->shared_regs[reg1->idx];
+       raw_spin_lock_irqsave(&er->lock, flags);
+       if (!atomic_read(&er->ref) ||
+           (er->config1 == reg1->config && er->config2 == reg2->config)) {
+               atomic_inc(&er->ref);
+               er->config1 = reg1->config;
+               er->config2 = reg2->config;
+               ok = true;
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       if (ok) {
+               if (!uncore_box_is_fake(box))
+                       reg1->alloc = 1;
+               return NULL;
+       }
+
+       return &uncore_constraint_empty;
+}
+
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_extra_reg *er;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+       /*
+        * Only put constraint if extra reg was actually allocated. Also
+        * takes care of event which do not use an extra shared reg.
+        *
+        * Also, if this is a fake box we shouldn't touch any event state
+        * (reg->alloc) and we don't care about leaving inconsistent box
+        * state either since it will be thrown out.
+        */
+       if (uncore_box_is_fake(box) || !reg1->alloc)
+               return;
+
+       er = &box->shared_regs[reg1->idx];
+       atomic_dec(&er->ref);
+       reg1->alloc = 0;
+}
+
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       u64 config;
+
+       er = &box->shared_regs[idx];
+
+       raw_spin_lock_irqsave(&er->lock, flags);
+       config = er->config;
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       return config;
+}
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box,
+                                  struct perf_event *event, int idx)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       hwc->idx = idx;
+       hwc->last_tag = ++box->tags[idx];
+
+       if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+               hwc->event_base = uncore_fixed_ctr(box);
+               hwc->config_base = uncore_fixed_ctl(box);
+               return;
+       }
+
+       hwc->config_base = uncore_event_ctl(box, hwc->idx);
+       hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
+}
+
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
+{
+       u64 prev_count, new_count, delta;
+       int shift;
+
+       if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+               shift = 64 - uncore_fixed_ctr_bits(box);
+       else
+               shift = 64 - uncore_perf_ctr_bits(box);
+
+       /* the hrtimer might modify the previous event value */
+again:
+       prev_count = local64_read(&event->hw.prev_count);
+       new_count = uncore_read_counter(box, event);
+       if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
+               goto again;
+
+       delta = (new_count << shift) - (prev_count << shift);
+       delta >>= shift;
+
+       local64_add(delta, &event->count);
+}
+
+/*
+ * The overflow interrupt is unavailable for SandyBridge-EP, is broken
+ * for SandyBridge. So we use hrtimer to periodically poll the counter
+ * to avoid overflow.
+ */
+static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
+{
+       struct intel_uncore_box *box;
+       struct perf_event *event;
+       unsigned long flags;
+       int bit;
+
+       box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
+       if (!box->n_active || box->cpu != smp_processor_id())
+               return HRTIMER_NORESTART;
+       /*
+        * disable local interrupt to prevent uncore_pmu_event_start/stop
+        * to interrupt the update process
+        */
+       local_irq_save(flags);
+
+       /*
+        * handle boxes with an active event list as opposed to active
+        * counters
+        */
+       list_for_each_entry(event, &box->active_list, active_entry) {
+               uncore_perf_event_update(box, event);
+       }
+
+       for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
+               uncore_perf_event_update(box, box->events[bit]);
+
+       local_irq_restore(flags);
+
+       hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
+       return HRTIMER_RESTART;
+}
+
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
+{
+       hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
+                     HRTIMER_MODE_REL_PINNED);
+}
+
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
+{
+       hrtimer_cancel(&box->hrtimer);
+}
+
+static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
+{
+       hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       box->hrtimer.function = uncore_pmu_hrtimer;
+}
+
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
+                                                int node)
+{
+       int i, size, numshared = type->num_shared_regs ;
+       struct intel_uncore_box *box;
+
+       size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
+
+       box = kzalloc_node(size, GFP_KERNEL, node);
+       if (!box)
+               return NULL;
+
+       for (i = 0; i < numshared; i++)
+               raw_spin_lock_init(&box->shared_regs[i].lock);
+
+       uncore_pmu_init_hrtimer(box);
+       box->cpu = -1;
+       box->pci_phys_id = -1;
+       box->pkgid = -1;
+
+       /* set default hrtimer timeout */
+       box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
+
+       INIT_LIST_HEAD(&box->active_list);
+
+       return box;
+}
+
+/*
+ * Using uncore_pmu_event_init pmu event_init callback
+ * as a detection point for uncore events.
+ */
+static int uncore_pmu_event_init(struct perf_event *event);
+
+static bool is_uncore_event(struct perf_event *event)
+{
+       return event->pmu->event_init == uncore_pmu_event_init;
+}
+
+static int
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
+                     bool dogrp)
+{
+       struct perf_event *event;
+       int n, max_count;
+
+       max_count = box->pmu->type->num_counters;
+       if (box->pmu->type->fixed_ctl)
+               max_count++;
+
+       if (box->n_events >= max_count)
+               return -EINVAL;
+
+       n = box->n_events;
+
+       if (is_uncore_event(leader)) {
+               box->event_list[n] = leader;
+               n++;
+       }
+
+       if (!dogrp)
+               return n;
+
+       list_for_each_entry(event, &leader->sibling_list, group_entry) {
+               if (!is_uncore_event(event) ||
+                   event->state <= PERF_EVENT_STATE_OFF)
+                       continue;
+
+               if (n >= max_count)
+                       return -EINVAL;
+
+               box->event_list[n] = event;
+               n++;
+       }
+       return n;
+}
+
+static struct event_constraint *
+uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_type *type = box->pmu->type;
+       struct event_constraint *c;
+
+       if (type->ops->get_constraint) {
+               c = type->ops->get_constraint(box, event);
+               if (c)
+                       return c;
+       }
+
+       if (event->attr.config == UNCORE_FIXED_EVENT)
+               return &uncore_constraint_fixed;
+
+       if (type->constraints) {
+               for_each_event_constraint(c, type->constraints) {
+                       if ((event->hw.config & c->cmask) == c->code)
+                               return c;
+               }
+       }
+
+       return &type->unconstrainted;
+}
+
+static void uncore_put_event_constraint(struct intel_uncore_box *box,
+                                       struct perf_event *event)
+{
+       if (box->pmu->type->ops->put_constraint)
+               box->pmu->type->ops->put_constraint(box, event);
+}
+
+static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
+{
+       unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+       struct event_constraint *c;
+       int i, wmin, wmax, ret = 0;
+       struct hw_perf_event *hwc;
+
+       bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
+
+       for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+               c = uncore_get_event_constraint(box, box->event_list[i]);
+               box->event_constraint[i] = c;
+               wmin = min(wmin, c->weight);
+               wmax = max(wmax, c->weight);
+       }
+
+       /* fastpath, try to reuse previous register */
+       for (i = 0; i < n; i++) {
+               hwc = &box->event_list[i]->hw;
+               c = box->event_constraint[i];
+
+               /* never assigned */
+               if (hwc->idx == -1)
+                       break;
+
+               /* constraint still honored */
+               if (!test_bit(hwc->idx, c->idxmsk))
+                       break;
+
+               /* not already used */
+               if (test_bit(hwc->idx, used_mask))
+                       break;
+
+               __set_bit(hwc->idx, used_mask);
+               if (assign)
+                       assign[i] = hwc->idx;
+       }
+       /* slow path */
+       if (i != n)
+               ret = perf_assign_events(box->event_constraint, n,
+                                        wmin, wmax, n, assign);
+
+       if (!assign || ret) {
+               for (i = 0; i < n; i++)
+                       uncore_put_event_constraint(box, box->event_list[i]);
+       }
+       return ret ? -EINVAL : 0;
+}
+
+static void uncore_pmu_event_start(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       int idx = event->hw.idx;
+
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+               return;
+
+       event->hw.state = 0;
+       box->events[idx] = event;
+       box->n_active++;
+       __set_bit(idx, box->active_mask);
+
+       local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
+       uncore_enable_event(box, event);
+
+       if (box->n_active == 1) {
+               uncore_enable_box(box);
+               uncore_pmu_start_hrtimer(box);
+       }
+}
+
+static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
+               uncore_disable_event(box, event);
+               box->n_active--;
+               box->events[hwc->idx] = NULL;
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+
+               if (box->n_active == 0) {
+                       uncore_disable_box(box);
+                       uncore_pmu_cancel_hrtimer(box);
+               }
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               uncore_perf_event_update(box, event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static int uncore_pmu_event_add(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+       int assign[UNCORE_PMC_IDX_MAX];
+       int i, n, ret;
+
+       if (!box)
+               return -ENODEV;
+
+       ret = n = uncore_collect_events(box, event, false);
+       if (ret < 0)
+               return ret;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_ARCH;
+
+       ret = uncore_assign_events(box, assign, n);
+       if (ret)
+               return ret;
+
+       /* save events moving to new counters */
+       for (i = 0; i < box->n_events; i++) {
+               event = box->event_list[i];
+               hwc = &event->hw;
+
+               if (hwc->idx == assign[i] &&
+                       hwc->last_tag == box->tags[assign[i]])
+                       continue;
+               /*
+                * Ensure we don't accidentally enable a stopped
+                * counter simply because we rescheduled.
+                */
+               if (hwc->state & PERF_HES_STOPPED)
+                       hwc->state |= PERF_HES_ARCH;
+
+               uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+       }
+
+       /* reprogram moved events into new counters */
+       for (i = 0; i < n; i++) {
+               event = box->event_list[i];
+               hwc = &event->hw;
+
+               if (hwc->idx != assign[i] ||
+                       hwc->last_tag != box->tags[assign[i]])
+                       uncore_assign_hw_event(box, event, assign[i]);
+               else if (i < box->n_events)
+                       continue;
+
+               if (hwc->state & PERF_HES_ARCH)
+                       continue;
+
+               uncore_pmu_event_start(event, 0);
+       }
+       box->n_events = n;
+
+       return 0;
+}
+
+static void uncore_pmu_event_del(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       int i;
+
+       uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < box->n_events; i++) {
+               if (event == box->event_list[i]) {
+                       uncore_put_event_constraint(box, event);
+
+                       for (++i; i < box->n_events; i++)
+                               box->event_list[i - 1] = box->event_list[i];
+
+                       --box->n_events;
+                       break;
+               }
+       }
+
+       event->hw.idx = -1;
+       event->hw.last_tag = ~0ULL;
+}
+
+void uncore_pmu_event_read(struct perf_event *event)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       uncore_perf_event_update(box, event);
+}
+
+/*
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int uncore_validate_group(struct intel_uncore_pmu *pmu,
+                               struct perf_event *event)
+{
+       struct perf_event *leader = event->group_leader;
+       struct intel_uncore_box *fake_box;
+       int ret = -EINVAL, n;
+
+       fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
+       if (!fake_box)
+               return -ENOMEM;
+
+       fake_box->pmu = pmu;
+       /*
+        * the event is not yet connected with its
+        * siblings therefore we must first collect
+        * existing siblings, then add the new event
+        * before we can simulate the scheduling
+        */
+       n = uncore_collect_events(fake_box, leader, true);
+       if (n < 0)
+               goto out;
+
+       fake_box->n_events = n;
+       n = uncore_collect_events(fake_box, event, false);
+       if (n < 0)
+               goto out;
+
+       fake_box->n_events = n;
+
+       ret = uncore_assign_events(fake_box, NULL, n);
+out:
+       kfree(fake_box);
+       return ret;
+}
+
+static int uncore_pmu_event_init(struct perf_event *event)
+{
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       struct hw_perf_event *hwc = &event->hw;
+       int ret;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       pmu = uncore_event_to_pmu(event);
+       /* no device found for this pmu */
+       if (pmu->func_id < 0)
+               return -ENOENT;
+
+       /*
+        * Uncore PMU does measure at all privilege level all the time.
+        * So it doesn't make sense to specify any exclude bits.
+        */
+       if (event->attr.exclude_user || event->attr.exclude_kernel ||
+                       event->attr.exclude_hv || event->attr.exclude_idle)
+               return -EINVAL;
+
+       /* Sampling not supported yet */
+       if (hwc->sample_period)
+               return -EINVAL;
+
+       /*
+        * Place all uncore events for a particular physical package
+        * onto a single cpu
+        */
+       if (event->cpu < 0)
+               return -EINVAL;
+       box = uncore_pmu_to_box(pmu, event->cpu);
+       if (!box || box->cpu < 0)
+               return -EINVAL;
+       event->cpu = box->cpu;
+       event->pmu_private = box;
+
+       event->hw.idx = -1;
+       event->hw.last_tag = ~0ULL;
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+       event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
+       if (event->attr.config == UNCORE_FIXED_EVENT) {
+               /* no fixed counter */
+               if (!pmu->type->fixed_ctl)
+                       return -EINVAL;
+               /*
+                * if there is only one fixed counter, only the first pmu
+                * can access the fixed counter
+                */
+               if (pmu->type->single_fixed && pmu->pmu_idx > 0)
+                       return -EINVAL;
+
+               /* fixed counters have event field hardcoded to zero */
+               hwc->config = 0ULL;
+       } else {
+               hwc->config = event->attr.config & pmu->type->event_mask;
+               if (pmu->type->ops->hw_config) {
+                       ret = pmu->type->ops->hw_config(box, event);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       if (event->group_leader != event)
+               ret = uncore_validate_group(pmu, event);
+       else
+               ret = 0;
+
+       return ret;
+}
+
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+       &dev_attr_cpumask.attr,
+       NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+       .attrs = uncore_pmu_attrs,
+};
+
+static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
+{
+       int ret;
+
+       if (!pmu->type->pmu) {
+               pmu->pmu = (struct pmu) {
+                       .attr_groups    = pmu->type->attr_groups,
+                       .task_ctx_nr    = perf_invalid_context,
+                       .event_init     = uncore_pmu_event_init,
+                       .add            = uncore_pmu_event_add,
+                       .del            = uncore_pmu_event_del,
+                       .start          = uncore_pmu_event_start,
+                       .stop           = uncore_pmu_event_stop,
+                       .read           = uncore_pmu_event_read,
+               };
+       } else {
+               pmu->pmu = *pmu->type->pmu;
+               pmu->pmu.attr_groups = pmu->type->attr_groups;
+       }
+
+       if (pmu->type->num_boxes == 1) {
+               if (strlen(pmu->type->name) > 0)
+                       sprintf(pmu->name, "uncore_%s", pmu->type->name);
+               else
+                       sprintf(pmu->name, "uncore");
+       } else {
+               sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
+                       pmu->pmu_idx);
+       }
+
+       ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+       if (!ret)
+               pmu->registered = true;
+       return ret;
+}
+
+static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
+{
+       if (!pmu->registered)
+               return;
+       perf_pmu_unregister(&pmu->pmu);
+       pmu->registered = false;
+}
+
+static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+{
+       struct intel_uncore_pmu *pmu = type->pmus;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       if (pmu) {
+               pkg = topology_physical_package_id(cpu);
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       box = pmu->boxes[pkg];
+                       if (box)
+                               uncore_box_exit(box);
+               }
+       }
+}
+
+static void __init uncore_exit_boxes(void *dummy)
+{
+       struct intel_uncore_type **types;
+
+       for (types = uncore_msr_uncores; *types; types++)
+               __uncore_exit_boxes(*types++, smp_processor_id());
+}
+
+static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
+{
+       int pkg;
+
+       for (pkg = 0; pkg < max_packages; pkg++)
+               kfree(pmu->boxes[pkg]);
+       kfree(pmu->boxes);
+}
+
+static void __init uncore_type_exit(struct intel_uncore_type *type)
+{
+       struct intel_uncore_pmu *pmu = type->pmus;
+       int i;
+
+       if (pmu) {
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       uncore_pmu_unregister(pmu);
+                       uncore_free_boxes(pmu);
+               }
+               kfree(type->pmus);
+               type->pmus = NULL;
+       }
+       kfree(type->events_group);
+       type->events_group = NULL;
+}
+
+static void __init uncore_types_exit(struct intel_uncore_type **types)
+{
+       for (; *types; types++)
+               uncore_type_exit(*types);
+}
+
+static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
+{
+       struct intel_uncore_pmu *pmus;
+       struct attribute_group *attr_group;
+       struct attribute **attrs;
+       size_t size;
+       int i, j;
+
+       pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
+       if (!pmus)
+               return -ENOMEM;
+
+       size = max_packages * sizeof(struct intel_uncore_box *);
+
+       for (i = 0; i < type->num_boxes; i++) {
+               pmus[i].func_id = setid ? i : -1;
+               pmus[i].pmu_idx = i;
+               pmus[i].type    = type;
+               pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
+               if (!pmus[i].boxes)
+                       return -ENOMEM;
+       }
+
+       type->pmus = pmus;
+       type->unconstrainted = (struct event_constraint)
+               __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
+                               0, type->num_counters, 0, 0);
+
+       if (type->event_descs) {
+               for (i = 0; type->event_descs[i].attr.attr.name; i++);
+
+               attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
+                                       sizeof(*attr_group), GFP_KERNEL);
+               if (!attr_group)
+                       return -ENOMEM;
+
+               attrs = (struct attribute **)(attr_group + 1);
+               attr_group->name = "events";
+               attr_group->attrs = attrs;
+
+               for (j = 0; j < i; j++)
+                       attrs[j] = &type->event_descs[j].attr.attr;
+
+               type->events_group = attr_group;
+       }
+
+       type->pmu_group = &uncore_pmu_attr_group;
+       return 0;
+}
+
+static int __init
+uncore_types_init(struct intel_uncore_type **types, bool setid)
+{
+       int ret;
+
+       for (; *types; types++) {
+               ret = uncore_type_init(*types, setid);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+/*
+ * add a pci uncore device
+ */
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct intel_uncore_type *type;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int phys_id, pkg, ret;
+
+       phys_id = uncore_pcibus_to_physid(pdev->bus);
+       if (phys_id < 0)
+               return -ENODEV;
+
+       pkg = topology_phys_to_logical_pkg(phys_id);
+       if (WARN_ON_ONCE(pkg < 0))
+               return -EINVAL;
+
+       if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+               int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
+
+               uncore_extra_pci_dev[pkg].dev[idx] = pdev;
+               pci_set_drvdata(pdev, NULL);
+               return 0;
+       }
+
+       type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+       /*
+        * for performance monitoring unit with multiple boxes,
+        * each box has a different function id.
+        */
+       pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+       /* Knights Landing uses a common PCI device ID for multiple instances of
+        * an uncore PMU device type. There is only one entry per device type in
+        * the knl_uncore_pci_ids table inspite of multiple devices present for
+        * some device types. Hence PCI device idx would be 0 for all devices.
+        * So increment pmu pointer to point to an unused array element.
+        */
+       if (boot_cpu_data.x86_model == 87) {
+               while (pmu->func_id >= 0)
+                       pmu++;
+       }
+
+       if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
+               return -EINVAL;
+
+       box = uncore_alloc_box(type, NUMA_NO_NODE);
+       if (!box)
+               return -ENOMEM;
+
+       if (pmu->func_id < 0)
+               pmu->func_id = pdev->devfn;
+       else
+               WARN_ON_ONCE(pmu->func_id != pdev->devfn);
+
+       atomic_inc(&box->refcnt);
+       box->pci_phys_id = phys_id;
+       box->pkgid = pkg;
+       box->pci_dev = pdev;
+       box->pmu = pmu;
+       uncore_box_init(box);
+       pci_set_drvdata(pdev, box);
+
+       pmu->boxes[pkg] = box;
+       if (atomic_inc_return(&pmu->activeboxes) > 1)
+               return 0;
+
+       /* First active box registers the pmu */
+       ret = uncore_pmu_register(pmu);
+       if (ret) {
+               pci_set_drvdata(pdev, NULL);
+               pmu->boxes[pkg] = NULL;
+               uncore_box_exit(box);
+               kfree(box);
+       }
+       return ret;
+}
+
+static void uncore_pci_remove(struct pci_dev *pdev)
+{
+       struct intel_uncore_box *box = pci_get_drvdata(pdev);
+       struct intel_uncore_pmu *pmu;
+       int i, phys_id, pkg;
+
+       phys_id = uncore_pcibus_to_physid(pdev->bus);
+       pkg = topology_phys_to_logical_pkg(phys_id);
+
+       box = pci_get_drvdata(pdev);
+       if (!box) {
+               for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
+                       if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
+                               uncore_extra_pci_dev[pkg].dev[i] = NULL;
+                               break;
+                       }
+               }
+               WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
+               return;
+       }
+
+       pmu = box->pmu;
+       if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
+               return;
+
+       pci_set_drvdata(pdev, NULL);
+       pmu->boxes[pkg] = NULL;
+       if (atomic_dec_return(&pmu->activeboxes) == 0)
+               uncore_pmu_unregister(pmu);
+       uncore_box_exit(box);
+       kfree(box);
+}
+
+static int __init uncore_pci_init(void)
+{
+       size_t size;
+       int ret;
+
+       switch (boot_cpu_data.x86_model) {
+       case 45: /* Sandy Bridge-EP */
+               ret = snbep_uncore_pci_init();
+               break;
+       case 62: /* Ivy Bridge-EP */
+               ret = ivbep_uncore_pci_init();
+               break;
+       case 63: /* Haswell-EP */
+               ret = hswep_uncore_pci_init();
+               break;
+       case 79: /* BDX-EP */
+       case 86: /* BDX-DE */
+               ret = bdx_uncore_pci_init();
+               break;
+       case 42: /* Sandy Bridge */
+               ret = snb_uncore_pci_init();
+               break;
+       case 58: /* Ivy Bridge */
+               ret = ivb_uncore_pci_init();
+               break;
+       case 60: /* Haswell */
+       case 69: /* Haswell Celeron */
+               ret = hsw_uncore_pci_init();
+               break;
+       case 61: /* Broadwell */
+               ret = bdw_uncore_pci_init();
+               break;
+       case 87: /* Knights Landing */
+               ret = knl_uncore_pci_init();
+               break;
+       case 94: /* SkyLake */
+               ret = skl_uncore_pci_init();
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       if (ret)
+               return ret;
+
+       size = max_packages * sizeof(struct pci_extra_dev);
+       uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
+       if (!uncore_extra_pci_dev) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       ret = uncore_types_init(uncore_pci_uncores, false);
+       if (ret)
+               goto errtype;
+
+       uncore_pci_driver->probe = uncore_pci_probe;
+       uncore_pci_driver->remove = uncore_pci_remove;
+
+       ret = pci_register_driver(uncore_pci_driver);
+       if (ret)
+               goto errtype;
+
+       pcidrv_registered = true;
+       return 0;
+
+errtype:
+       uncore_types_exit(uncore_pci_uncores);
+       kfree(uncore_extra_pci_dev);
+       uncore_extra_pci_dev = NULL;
+       uncore_free_pcibus_map();
+err:
+       uncore_pci_uncores = empty_uncore;
+       return ret;
+}
+
+static void __init uncore_pci_exit(void)
+{
+       if (pcidrv_registered) {
+               pcidrv_registered = false;
+               pci_unregister_driver(uncore_pci_driver);
+               uncore_types_exit(uncore_pci_uncores);
+               kfree(uncore_extra_pci_dev);
+               uncore_free_pcibus_map();
+       }
+}
+
+static void uncore_cpu_dying(int cpu)
+{
+       struct intel_uncore_type *type, **types = uncore_msr_uncores;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       pkg = topology_logical_package_id(cpu);
+       for (; *types; types++) {
+               type = *types;
+               pmu = type->pmus;
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       box = pmu->boxes[pkg];
+                       if (box && atomic_dec_return(&box->refcnt) == 0)
+                               uncore_box_exit(box);
+               }
+       }
+}
+
+static void uncore_cpu_starting(int cpu, bool init)
+{
+       struct intel_uncore_type *type, **types = uncore_msr_uncores;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int i, pkg, ncpus = 1;
+
+       if (init) {
+               /*
+                * On init we get the number of online cpus in the package
+                * and set refcount for all of them.
+                */
+               ncpus = cpumask_weight(topology_core_cpumask(cpu));
+       }
+
+       pkg = topology_logical_package_id(cpu);
+       for (; *types; types++) {
+               type = *types;
+               pmu = type->pmus;
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       box = pmu->boxes[pkg];
+                       if (!box)
+                               continue;
+                       /* The first cpu on a package activates the box */
+                       if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
+                               uncore_box_init(box);
+               }
+       }
+}
+
+static int uncore_cpu_prepare(int cpu)
+{
+       struct intel_uncore_type *type, **types = uncore_msr_uncores;
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       pkg = topology_logical_package_id(cpu);
+       for (; *types; types++) {
+               type = *types;
+               pmu = type->pmus;
+               for (i = 0; i < type->num_boxes; i++, pmu++) {
+                       if (pmu->boxes[pkg])
+                               continue;
+                       /* First cpu of a package allocates the box */
+                       box = uncore_alloc_box(type, cpu_to_node(cpu));
+                       if (!box)
+                               return -ENOMEM;
+                       box->pmu = pmu;
+                       box->pkgid = pkg;
+                       pmu->boxes[pkg] = box;
+               }
+       }
+       return 0;
+}
+
+static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
+                                  int new_cpu)
+{
+       struct intel_uncore_pmu *pmu = type->pmus;
+       struct intel_uncore_box *box;
+       int i, pkg;
+
+       pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
+       for (i = 0; i < type->num_boxes; i++, pmu++) {
+               box = pmu->boxes[pkg];
+               if (!box)
+                       continue;
+
+               if (old_cpu < 0) {
+                       WARN_ON_ONCE(box->cpu != -1);
+                       box->cpu = new_cpu;
+                       continue;
+               }
+
+               WARN_ON_ONCE(box->cpu != old_cpu);
+               box->cpu = -1;
+               if (new_cpu < 0)
+                       continue;
+
+               uncore_pmu_cancel_hrtimer(box);
+               perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
+               box->cpu = new_cpu;
+       }
+}
+
+static void uncore_change_context(struct intel_uncore_type **uncores,
+                                 int old_cpu, int new_cpu)
+{
+       for (; *uncores; uncores++)
+               uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
+}
+
+static void uncore_event_exit_cpu(int cpu)
+{
+       int target;
+
+       /* Check if exiting cpu is used for collecting uncore events */
+       if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
+               return;
+
+       /* Find a new cpu to collect uncore events */
+       target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+
+       /* Migrate uncore events to the new target */
+       if (target < nr_cpu_ids)
+               cpumask_set_cpu(target, &uncore_cpu_mask);
+       else
+               target = -1;
+
+       uncore_change_context(uncore_msr_uncores, cpu, target);
+       uncore_change_context(uncore_pci_uncores, cpu, target);
+}
+
+static void uncore_event_init_cpu(int cpu)
+{
+       int target;
+
+       /*
+        * Check if there is an online cpu in the package
+        * which collects uncore events already.
+        */
+       target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
+       if (target < nr_cpu_ids)
+               return;
+
+       cpumask_set_cpu(cpu, &uncore_cpu_mask);
+
+       uncore_change_context(uncore_msr_uncores, -1, cpu);
+       uncore_change_context(uncore_pci_uncores, -1, cpu);
+}
+
+static int uncore_cpu_notifier(struct notifier_block *self,
+                              unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (long)hcpu;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_UP_PREPARE:
+               return notifier_from_errno(uncore_cpu_prepare(cpu));
+
+       case CPU_STARTING:
+               uncore_cpu_starting(cpu, false);
+       case CPU_DOWN_FAILED:
+               uncore_event_init_cpu(cpu);
+               break;
+
+       case CPU_UP_CANCELED:
+       case CPU_DYING:
+               uncore_cpu_dying(cpu);
+               break;
+
+       case CPU_DOWN_PREPARE:
+               uncore_event_exit_cpu(cpu);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block uncore_cpu_nb = {
+       .notifier_call  = uncore_cpu_notifier,
+       /*
+        * to migrate uncore events, our notifier should be executed
+        * before perf core's notifier.
+        */
+       .priority       = CPU_PRI_PERF + 1,
+};
+
+static int __init type_pmu_register(struct intel_uncore_type *type)
+{
+       int i, ret;
+
+       for (i = 0; i < type->num_boxes; i++) {
+               ret = uncore_pmu_register(&type->pmus[i]);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static int __init uncore_msr_pmus_register(void)
+{
+       struct intel_uncore_type **types = uncore_msr_uncores;
+       int ret;
+
+       for (; *types; types++) {
+               ret = type_pmu_register(*types);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+static int __init uncore_cpu_init(void)
+{
+       int ret;
+
+       switch (boot_cpu_data.x86_model) {
+       case 26: /* Nehalem */
+       case 30:
+       case 37: /* Westmere */
+       case 44:
+               nhm_uncore_cpu_init();
+               break;
+       case 42: /* Sandy Bridge */
+       case 58: /* Ivy Bridge */
+       case 60: /* Haswell */
+       case 69: /* Haswell */
+       case 70: /* Haswell */
+       case 61: /* Broadwell */
+       case 71: /* Broadwell */
+               snb_uncore_cpu_init();
+               break;
+       case 45: /* Sandy Bridge-EP */
+               snbep_uncore_cpu_init();
+               break;
+       case 46: /* Nehalem-EX */
+       case 47: /* Westmere-EX aka. Xeon E7 */
+               nhmex_uncore_cpu_init();
+               break;
+       case 62: /* Ivy Bridge-EP */
+               ivbep_uncore_cpu_init();
+               break;
+       case 63: /* Haswell-EP */
+               hswep_uncore_cpu_init();
+               break;
+       case 79: /* BDX-EP */
+       case 86: /* BDX-DE */
+               bdx_uncore_cpu_init();
+               break;
+       case 87: /* Knights Landing */
+               knl_uncore_cpu_init();
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       ret = uncore_types_init(uncore_msr_uncores, true);
+       if (ret)
+               goto err;
+
+       ret = uncore_msr_pmus_register();
+       if (ret)
+               goto err;
+       return 0;
+err:
+       uncore_types_exit(uncore_msr_uncores);
+       uncore_msr_uncores = empty_uncore;
+       return ret;
+}
+
+static void __init uncore_cpu_setup(void *dummy)
+{
+       uncore_cpu_starting(smp_processor_id(), true);
+}
+
+/* Lazy to avoid allocation of a few bytes for the normal case */
+static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
+
+static int __init uncore_cpumask_init(bool msr)
+{
+       unsigned int cpu;
+
+       for_each_online_cpu(cpu) {
+               unsigned int pkg = topology_logical_package_id(cpu);
+               int ret;
+
+               if (test_and_set_bit(pkg, packages))
+                       continue;
+               /*
+                * The first online cpu of each package allocates and takes
+                * the refcounts for all other online cpus in that package.
+                * If msrs are not enabled no allocation is required.
+                */
+               if (msr) {
+                       ret = uncore_cpu_prepare(cpu);
+                       if (ret)
+                               return ret;
+               }
+               uncore_event_init_cpu(cpu);
+               smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
+       }
+       __register_cpu_notifier(&uncore_cpu_nb);
+       return 0;
+}
+
+static int __init intel_uncore_init(void)
+{
+       int pret, cret, ret;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return -ENODEV;
+
+       if (cpu_has_hypervisor)
+               return -ENODEV;
+
+       max_packages = topology_max_packages();
+
+       pret = uncore_pci_init();
+       cret = uncore_cpu_init();
+
+       if (cret && pret)
+               return -ENODEV;
+
+       cpu_notifier_register_begin();
+       ret = uncore_cpumask_init(!cret);
+       if (ret)
+               goto err;
+       cpu_notifier_register_done();
+       return 0;
+
+err:
+       /* Undo box->init_box() */
+       on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
+       uncore_types_exit(uncore_msr_uncores);
+       uncore_pci_exit();
+       cpu_notifier_register_done();
+       return ret;
+}
+device_initcall(intel_uncore_init);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
new file mode 100644 (file)
index 0000000..79766b9
--- /dev/null
@@ -0,0 +1,378 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <asm/apicdef.h>
+
+#include <linux/perf_event.h>
+#include "../perf_event.h"
+
+#define UNCORE_PMU_NAME_LEN            32
+#define UNCORE_PMU_HRTIMER_INTERVAL    (60LL * NSEC_PER_SEC)
+#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
+
+#define UNCORE_FIXED_EVENT             0xff
+#define UNCORE_PMC_IDX_MAX_GENERIC     8
+#define UNCORE_PMC_IDX_FIXED           UNCORE_PMC_IDX_MAX_GENERIC
+#define UNCORE_PMC_IDX_MAX             (UNCORE_PMC_IDX_FIXED + 1)
+
+#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_TYPE(data)      ((data >> 8) & 0xff)
+#define UNCORE_PCI_DEV_IDX(data)       (data & 0xff)
+#define UNCORE_EXTRA_PCI_DEV           0xff
+#define UNCORE_EXTRA_PCI_DEV_MAX       3
+
+#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+
+struct pci_extra_dev {
+       struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
+};
+
+struct intel_uncore_ops;
+struct intel_uncore_pmu;
+struct intel_uncore_box;
+struct uncore_event_desc;
+
+struct intel_uncore_type {
+       const char *name;
+       int num_counters;
+       int num_boxes;
+       int perf_ctr_bits;
+       int fixed_ctr_bits;
+       unsigned perf_ctr;
+       unsigned event_ctl;
+       unsigned event_mask;
+       unsigned fixed_ctr;
+       unsigned fixed_ctl;
+       unsigned box_ctl;
+       unsigned msr_offset;
+       unsigned num_shared_regs:8;
+       unsigned single_fixed:1;
+       unsigned pair_ctr_ctl:1;
+       unsigned *msr_offsets;
+       struct event_constraint unconstrainted;
+       struct event_constraint *constraints;
+       struct intel_uncore_pmu *pmus;
+       struct intel_uncore_ops *ops;
+       struct uncore_event_desc *event_descs;
+       const struct attribute_group *attr_groups[4];
+       struct pmu *pmu; /* for custom pmu ops */
+};
+
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
+
+struct intel_uncore_ops {
+       void (*init_box)(struct intel_uncore_box *);
+       void (*exit_box)(struct intel_uncore_box *);
+       void (*disable_box)(struct intel_uncore_box *);
+       void (*enable_box)(struct intel_uncore_box *);
+       void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
+       void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
+       u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+       int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
+       struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
+                                                  struct perf_event *);
+       void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
+};
+
+struct intel_uncore_pmu {
+       struct pmu                      pmu;
+       char                            name[UNCORE_PMU_NAME_LEN];
+       int                             pmu_idx;
+       int                             func_id;
+       bool                            registered;
+       atomic_t                        activeboxes;
+       struct intel_uncore_type        *type;
+       struct intel_uncore_box         **boxes;
+};
+
+struct intel_uncore_extra_reg {
+       raw_spinlock_t lock;
+       u64 config, config1, config2;
+       atomic_t ref;
+};
+
+struct intel_uncore_box {
+       int pci_phys_id;
+       int pkgid;
+       int n_active;   /* number of active events */
+       int n_events;
+       int cpu;        /* cpu to collect events */
+       unsigned long flags;
+       atomic_t refcnt;
+       struct perf_event *events[UNCORE_PMC_IDX_MAX];
+       struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+       struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
+       unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+       u64 tags[UNCORE_PMC_IDX_MAX];
+       struct pci_dev *pci_dev;
+       struct intel_uncore_pmu *pmu;
+       u64 hrtimer_duration; /* hrtimer timeout for this box */
+       struct hrtimer hrtimer;
+       struct list_head list;
+       struct list_head active_list;
+       void *io_addr;
+       struct intel_uncore_extra_reg shared_regs[0];
+};
+
+#define UNCORE_BOX_FLAG_INITIATED      0
+
+struct uncore_event_desc {
+       struct kobj_attribute attr;
+       const char *config;
+};
+
+struct pci2phy_map {
+       struct list_head list;
+       int segment;
+       int pbus_to_physid[256];
+};
+
+struct pci2phy_map *__find_pci2phy_map(int segment);
+
+ssize_t uncore_event_show(struct kobject *kobj,
+                         struct kobj_attribute *attr, char *buf);
+
+#define INTEL_UNCORE_EVENT_DESC(_name, _config)                        \
+{                                                              \
+       .attr   = __ATTR(_name, 0444, uncore_event_show, NULL), \
+       .config = _config,                                      \
+}
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)                        \
+static ssize_t __uncore_##_var##_show(struct kobject *kobj,            \
+                               struct kobj_attribute *attr,            \
+                               char *page)                             \
+{                                                                      \
+       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
+       return sprintf(page, _format "\n");                             \
+}                                                                      \
+static struct kobj_attribute format_attr_##_var =                      \
+       __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
+{
+       return box->pmu->type->box_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctr;
+}
+
+static inline
+unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
+{
+       return idx * 4 + box->pmu->type->event_ctl;
+}
+
+static inline
+unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+       return idx * 8 + box->pmu->type->perf_ctr;
+}
+
+static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
+{
+       struct intel_uncore_pmu *pmu = box->pmu;
+       return pmu->type->msr_offsets ?
+               pmu->type->msr_offsets[pmu->pmu_idx] :
+               pmu->type->msr_offset * pmu->pmu_idx;
+}
+
+static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+{
+       if (!box->pmu->type->box_ctl)
+               return 0;
+       return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
+}
+
+static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+{
+       if (!box->pmu->type->fixed_ctl)
+               return 0;
+       return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
+}
+
+static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
+{
+       return box->pmu->type->event_ctl +
+               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+               uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+       return box->pmu->type->perf_ctr +
+               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+               uncore_msr_box_offset(box);
+}
+
+static inline
+unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
+{
+       if (box->pci_dev)
+               return uncore_pci_fixed_ctl(box);
+       else
+               return uncore_msr_fixed_ctl(box);
+}
+
+static inline
+unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
+{
+       if (box->pci_dev)
+               return uncore_pci_fixed_ctr(box);
+       else
+               return uncore_msr_fixed_ctr(box);
+}
+
+static inline
+unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
+{
+       if (box->pci_dev)
+               return uncore_pci_event_ctl(box, idx);
+       else
+               return uncore_msr_event_ctl(box, idx);
+}
+
+static inline
+unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+       if (box->pci_dev)
+               return uncore_pci_perf_ctr(box, idx);
+       else
+               return uncore_msr_perf_ctr(box, idx);
+}
+
+static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
+{
+       return box->pmu->type->perf_ctr_bits;
+}
+
+static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
+{
+       return box->pmu->type->fixed_ctr_bits;
+}
+
+static inline int uncore_num_counters(struct intel_uncore_box *box)
+{
+       return box->pmu->type->num_counters;
+}
+
+static inline void uncore_disable_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->type->ops->disable_box)
+               box->pmu->type->ops->disable_box(box);
+}
+
+static inline void uncore_enable_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->type->ops->enable_box)
+               box->pmu->type->ops->enable_box(box);
+}
+
+static inline void uncore_disable_event(struct intel_uncore_box *box,
+                               struct perf_event *event)
+{
+       box->pmu->type->ops->disable_event(box, event);
+}
+
+static inline void uncore_enable_event(struct intel_uncore_box *box,
+                               struct perf_event *event)
+{
+       box->pmu->type->ops->enable_event(box, event);
+}
+
+static inline u64 uncore_read_counter(struct intel_uncore_box *box,
+                               struct perf_event *event)
+{
+       return box->pmu->type->ops->read_counter(box, event);
+}
+
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+       if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+               if (box->pmu->type->ops->init_box)
+                       box->pmu->type->ops->init_box(box);
+       }
+}
+
+static inline void uncore_box_exit(struct intel_uncore_box *box)
+{
+       if (test_and_clear_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+               if (box->pmu->type->ops->exit_box)
+                       box->pmu->type->ops->exit_box(box);
+       }
+}
+
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+       return (box->pkgid < 0);
+}
+
+static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+       return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+       return event->pmu_private;
+}
+
+struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
+u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_event_read(struct perf_event *event);
+void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
+struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
+void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
+u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
+
+extern struct intel_uncore_type **uncore_msr_uncores;
+extern struct intel_uncore_type **uncore_pci_uncores;
+extern struct pci_driver *uncore_pci_driver;
+extern raw_spinlock_t pci2phy_map_lock;
+extern struct list_head pci2phy_map_head;
+extern struct pci_extra_dev *uncore_extra_pci_dev;
+extern struct event_constraint uncore_constraint_empty;
+
+/* perf_event_intel_uncore_snb.c */
+int snb_uncore_pci_init(void);
+int ivb_uncore_pci_init(void);
+int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
+int skl_uncore_pci_init(void);
+void snb_uncore_cpu_init(void);
+void nhm_uncore_cpu_init(void);
+int snb_pci2phy_map_init(int devid);
+
+/* perf_event_intel_uncore_snbep.c */
+int snbep_uncore_pci_init(void);
+void snbep_uncore_cpu_init(void);
+int ivbep_uncore_pci_init(void);
+void ivbep_uncore_cpu_init(void);
+int hswep_uncore_pci_init(void);
+void hswep_uncore_cpu_init(void);
+int bdx_uncore_pci_init(void);
+void bdx_uncore_cpu_init(void);
+int knl_uncore_pci_init(void);
+void knl_uncore_cpu_init(void);
+
+/* perf_event_intel_uncore_nhmex.c */
+void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
new file mode 100644 (file)
index 0000000..cda5693
--- /dev/null
@@ -0,0 +1,1227 @@
+/* Nehalem-EX/Westmere-EX uncore support */
+#include "uncore.h"
+
+/* NHM-EX event control */
+#define NHMEX_PMON_CTL_EV_SEL_MASK     0x000000ff
+#define NHMEX_PMON_CTL_UMASK_MASK      0x0000ff00
+#define NHMEX_PMON_CTL_EN_BIT0         (1 << 0)
+#define NHMEX_PMON_CTL_EDGE_DET                (1 << 18)
+#define NHMEX_PMON_CTL_PMI_EN          (1 << 20)
+#define NHMEX_PMON_CTL_EN_BIT22                (1 << 22)
+#define NHMEX_PMON_CTL_INVERT          (1 << 23)
+#define NHMEX_PMON_CTL_TRESH_MASK      0xff000000
+#define NHMEX_PMON_RAW_EVENT_MASK      (NHMEX_PMON_CTL_EV_SEL_MASK | \
+                                        NHMEX_PMON_CTL_UMASK_MASK | \
+                                        NHMEX_PMON_CTL_EDGE_DET | \
+                                        NHMEX_PMON_CTL_INVERT | \
+                                        NHMEX_PMON_CTL_TRESH_MASK)
+
+/* NHM-EX Ubox */
+#define NHMEX_U_MSR_PMON_GLOBAL_CTL            0xc00
+#define NHMEX_U_MSR_PMON_CTR                   0xc11
+#define NHMEX_U_MSR_PMON_EV_SEL                        0xc10
+
+#define NHMEX_U_PMON_GLOBAL_EN                 (1 << 0)
+#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL       0x0000001e
+#define NHMEX_U_PMON_GLOBAL_EN_ALL             (1 << 28)
+#define NHMEX_U_PMON_GLOBAL_RST_ALL            (1 << 29)
+#define NHMEX_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
+
+#define NHMEX_U_PMON_RAW_EVENT_MASK            \
+               (NHMEX_PMON_CTL_EV_SEL_MASK |   \
+                NHMEX_PMON_CTL_EDGE_DET)
+
+/* NHM-EX Cbox */
+#define NHMEX_C0_MSR_PMON_GLOBAL_CTL           0xd00
+#define NHMEX_C0_MSR_PMON_CTR0                 0xd11
+#define NHMEX_C0_MSR_PMON_EV_SEL0              0xd10
+#define NHMEX_C_MSR_OFFSET                     0x20
+
+/* NHM-EX Bbox */
+#define NHMEX_B0_MSR_PMON_GLOBAL_CTL           0xc20
+#define NHMEX_B0_MSR_PMON_CTR0                 0xc31
+#define NHMEX_B0_MSR_PMON_CTL0                 0xc30
+#define NHMEX_B_MSR_OFFSET                     0x40
+#define NHMEX_B0_MSR_MATCH                     0xe45
+#define NHMEX_B0_MSR_MASK                      0xe46
+#define NHMEX_B1_MSR_MATCH                     0xe4d
+#define NHMEX_B1_MSR_MASK                      0xe4e
+
+#define NHMEX_B_PMON_CTL_EN                    (1 << 0)
+#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT          1
+#define NHMEX_B_PMON_CTL_EV_SEL_MASK           \
+               (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_B_PMON_CTR_SHIFT         6
+#define NHMEX_B_PMON_CTR_MASK          \
+               (0x3 << NHMEX_B_PMON_CTR_SHIFT)
+#define NHMEX_B_PMON_RAW_EVENT_MASK            \
+               (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
+                NHMEX_B_PMON_CTR_MASK)
+
+/* NHM-EX Sbox */
+#define NHMEX_S0_MSR_PMON_GLOBAL_CTL           0xc40
+#define NHMEX_S0_MSR_PMON_CTR0                 0xc51
+#define NHMEX_S0_MSR_PMON_CTL0                 0xc50
+#define NHMEX_S_MSR_OFFSET                     0x80
+#define NHMEX_S0_MSR_MM_CFG                    0xe48
+#define NHMEX_S0_MSR_MATCH                     0xe49
+#define NHMEX_S0_MSR_MASK                      0xe4a
+#define NHMEX_S1_MSR_MM_CFG                    0xe58
+#define NHMEX_S1_MSR_MATCH                     0xe59
+#define NHMEX_S1_MSR_MASK                      0xe5a
+
+#define NHMEX_S_PMON_MM_CFG_EN                 (0x1ULL << 63)
+#define NHMEX_S_EVENT_TO_R_PROG_EV             0
+
+/* NHM-EX Mbox */
+#define NHMEX_M0_MSR_GLOBAL_CTL                        0xca0
+#define NHMEX_M0_MSR_PMU_DSP                   0xca5
+#define NHMEX_M0_MSR_PMU_ISS                   0xca6
+#define NHMEX_M0_MSR_PMU_MAP                   0xca7
+#define NHMEX_M0_MSR_PMU_MSC_THR               0xca8
+#define NHMEX_M0_MSR_PMU_PGT                   0xca9
+#define NHMEX_M0_MSR_PMU_PLD                   0xcaa
+#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC           0xcab
+#define NHMEX_M0_MSR_PMU_CTL0                  0xcb0
+#define NHMEX_M0_MSR_PMU_CNT0                  0xcb1
+#define NHMEX_M_MSR_OFFSET                     0x40
+#define NHMEX_M0_MSR_PMU_MM_CFG                        0xe54
+#define NHMEX_M1_MSR_PMU_MM_CFG                        0xe5c
+
+#define NHMEX_M_PMON_MM_CFG_EN                 (1ULL << 63)
+#define NHMEX_M_PMON_ADDR_MATCH_MASK           0x3ffffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_MASK            0x7ffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_SHIFT           34
+
+#define NHMEX_M_PMON_CTL_EN                    (1 << 0)
+#define NHMEX_M_PMON_CTL_PMI_EN                        (1 << 1)
+#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT      2
+#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK       \
+       (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT    4
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK     \
+       (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_WRAP_MODE             (1 << 6)
+#define NHMEX_M_PMON_CTL_FLAG_MODE             (1 << 7)
+#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT         9
+#define NHMEX_M_PMON_CTL_INC_SEL_MASK          \
+       (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT    19
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK     \
+       (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
+#define NHMEX_M_PMON_RAW_EVENT_MASK                    \
+               (NHMEX_M_PMON_CTL_COUNT_MODE_MASK |     \
+                NHMEX_M_PMON_CTL_STORAGE_MODE_MASK |   \
+                NHMEX_M_PMON_CTL_WRAP_MODE |           \
+                NHMEX_M_PMON_CTL_FLAG_MODE |           \
+                NHMEX_M_PMON_CTL_INC_SEL_MASK |        \
+                NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
+
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 11) - 1) | (1 << 23))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
+
+#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 12) - 1) | (1 << 24))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
+
+/*
+ * use the 9~13 bits to select event If the 7th bit is not set,
+ * otherwise use the 19~21 bits to select event.
+ */
+#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
+                               NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+                          NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
+                               NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_EXTAR_REG(c, r) \
+               EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+                               MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
+#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
+               EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+                               MBOX_SET_FLAG_SEL_MASK, \
+                               (u64)-1, NHMEX_M_##r)
+
+/* NHM-EX Rbox */
+#define NHMEX_R_MSR_GLOBAL_CTL                 0xe00
+#define NHMEX_R_MSR_PMON_CTL0                  0xe10
+#define NHMEX_R_MSR_PMON_CNT0                  0xe11
+#define NHMEX_R_MSR_OFFSET                     0x20
+
+#define NHMEX_R_MSR_PORTN_QLX_CFG(n)           \
+               ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n)                (0xe04 + (n))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n)                (0xe24 + (n))
+#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n)                \
+               (((n) < 4 ? 0 : 0x10) + (n) * 4)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n)   \
+               (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n)    \
+               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n)     \
+               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n)   \
+               (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n)    \
+               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n)     \
+               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
+
+#define NHMEX_R_PMON_CTL_EN                    (1 << 0)
+#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT          1
+#define NHMEX_R_PMON_CTL_EV_SEL_MASK           \
+               (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_R_PMON_CTL_PMI_EN                        (1 << 6)
+#define NHMEX_R_PMON_RAW_EVENT_MASK            NHMEX_R_PMON_CTL_EV_SEL_MASK
+
+/* NHM-EX Wbox */
+#define NHMEX_W_MSR_GLOBAL_CTL                 0xc80
+#define NHMEX_W_MSR_PMON_CNT0                  0xc90
+#define NHMEX_W_MSR_PMON_EVT_SEL0              0xc91
+#define NHMEX_W_MSR_PMON_FIXED_CTR             0x394
+#define NHMEX_W_MSR_PMON_FIXED_CTL             0x395
+
+#define NHMEX_W_PMON_GLOBAL_FIXED_EN           (1ULL << 31)
+
+#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
+                               ((1ULL << (n)) - 1)))
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
+DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
+
+static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+}
+
+static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0);
+}
+
+static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+       u64 config;
+
+       if (msr) {
+               rdmsrl(msr, config);
+               config &= ~((1ULL << uncore_num_counters(box)) - 1);
+               /* WBox has a fixed counter */
+               if (uncore_msr_fixed_ctl(box))
+                       config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
+               wrmsrl(msr, config);
+       }
+}
+
+static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+       u64 config;
+
+       if (msr) {
+               rdmsrl(msr, config);
+               config |= (1ULL << uncore_num_counters(box)) - 1;
+               /* WBox has a fixed counter */
+               if (uncore_msr_fixed_ctl(box))
+                       config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
+               wrmsrl(msr, config);
+       }
+}
+
+static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       wrmsrl(event->hw.config_base, 0);
+}
+
+static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+               wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+       else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
+               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+       else
+               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+#define NHMEX_UNCORE_OPS_COMMON_INIT()                         \
+       .init_box       = nhmex_uncore_msr_init_box,            \
+       .exit_box       = nhmex_uncore_msr_exit_box,            \
+       .disable_box    = nhmex_uncore_msr_disable_box,         \
+       .enable_box     = nhmex_uncore_msr_enable_box,          \
+       .disable_event  = nhmex_uncore_msr_disable_event,       \
+       .read_counter   = uncore_msr_read_counter
+
+static struct intel_uncore_ops nhmex_uncore_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event   = nhmex_uncore_msr_enable_event,
+};
+
+static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_edge.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_ubox_format_group = {
+       .name           = "format",
+       .attrs          = nhmex_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_ubox = {
+       .name           = "ubox",
+       .num_counters   = 1,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .event_ctl      = NHMEX_U_MSR_PMON_EV_SEL,
+       .perf_ctr       = NHMEX_U_MSR_PMON_CTR,
+       .event_mask     = NHMEX_U_PMON_RAW_EVENT_MASK,
+       .box_ctl        = NHMEX_U_MSR_PMON_GLOBAL_CTL,
+       .ops            = &nhmex_uncore_ops,
+       .format_group   = &nhmex_uncore_ubox_format_group
+};
+
+static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = nhmex_uncore_cbox_formats_attr,
+};
+
+/* msr offset for each instance of cbox */
+static unsigned nhmex_cbox_msr_offsets[] = {
+       0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
+};
+
+static struct intel_uncore_type nhmex_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 6,
+       .num_boxes              = 10,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_C0_MSR_PMON_EV_SEL0,
+       .perf_ctr               = NHMEX_C0_MSR_PMON_CTR0,
+       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
+       .msr_offsets            = nhmex_cbox_msr_offsets,
+       .pair_ctr_ctl           = 1,
+       .ops                    = &nhmex_uncore_ops,
+       .format_group           = &nhmex_uncore_cbox_format_group
+};
+
+static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type nhmex_uncore_wbox = {
+       .name                   = "wbox",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_W_MSR_PMON_CNT0,
+       .perf_ctr               = NHMEX_W_MSR_PMON_EVT_SEL0,
+       .fixed_ctr              = NHMEX_W_MSR_PMON_FIXED_CTR,
+       .fixed_ctl              = NHMEX_W_MSR_PMON_FIXED_CTL,
+       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_W_MSR_GLOBAL_CTL,
+       .pair_ctr_ctl           = 1,
+       .event_descs            = nhmex_uncore_wbox_events,
+       .ops                    = &nhmex_uncore_ops,
+       .format_group           = &nhmex_uncore_cbox_format_group
+};
+
+static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       int ctr, ev_sel;
+
+       ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
+               NHMEX_B_PMON_CTR_SHIFT;
+       ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
+                 NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
+
+       /* events that do not use the match/mask registers */
+       if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
+           (ctr == 2 && ev_sel != 0x4) || ctr == 3)
+               return 0;
+
+       if (box->pmu->pmu_idx == 0)
+               reg1->reg = NHMEX_B0_MSR_MATCH;
+       else
+               reg1->reg = NHMEX_B1_MSR_MATCH;
+       reg1->idx = 0;
+       reg1->config = event->attr.config1;
+       reg2->config = event->attr.config2;
+       return 0;
+}
+
+static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               wrmsrl(reg1->reg, reg1->config);
+               wrmsrl(reg1->reg + 1, reg2->config);
+       }
+       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+               (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
+}
+
+/*
+ * The Bbox has 4 counters, but each counter monitors different events.
+ * Use bits 6-7 in the event config to select counter.
+ */
+static struct event_constraint nhmex_uncore_bbox_constraints[] = {
+       EVENT_CONSTRAINT(0 , 1, 0xc0),
+       EVENT_CONSTRAINT(0x40, 2, 0xc0),
+       EVENT_CONSTRAINT(0x80, 4, 0xc0),
+       EVENT_CONSTRAINT(0xc0, 8, 0xc0),
+       EVENT_CONSTRAINT_END,
+};
+
+static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
+       &format_attr_event5.attr,
+       &format_attr_counter.attr,
+       &format_attr_match.attr,
+       &format_attr_mask.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_bbox_format_group = {
+       .name = "format",
+       .attrs = nhmex_uncore_bbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event           = nhmex_bbox_msr_enable_event,
+       .hw_config              = nhmex_bbox_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_bbox = {
+       .name                   = "bbox",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_B0_MSR_PMON_CTL0,
+       .perf_ctr               = NHMEX_B0_MSR_PMON_CTR0,
+       .event_mask             = NHMEX_B_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
+       .msr_offset             = NHMEX_B_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 1,
+       .constraints            = nhmex_uncore_bbox_constraints,
+       .ops                    = &nhmex_uncore_bbox_ops,
+       .format_group           = &nhmex_uncore_bbox_format_group
+};
+
+static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       /* only TO_R_PROG_EV event uses the match/mask register */
+       if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
+           NHMEX_S_EVENT_TO_R_PROG_EV)
+               return 0;
+
+       if (box->pmu->pmu_idx == 0)
+               reg1->reg = NHMEX_S0_MSR_MM_CFG;
+       else
+               reg1->reg = NHMEX_S1_MSR_MM_CFG;
+       reg1->idx = 0;
+       reg1->config = event->attr.config1;
+       reg2->config = event->attr.config2;
+       return 0;
+}
+
+static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               wrmsrl(reg1->reg, 0);
+               wrmsrl(reg1->reg + 1, reg1->config);
+               wrmsrl(reg1->reg + 2, reg2->config);
+               wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+       }
+       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+}
+
+static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_match.attr,
+       &format_attr_mask.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_sbox_format_group = {
+       .name                   = "format",
+       .attrs                  = nhmex_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event           = nhmex_sbox_msr_enable_event,
+       .hw_config              = nhmex_sbox_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_sbox = {
+       .name                   = "sbox",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_S0_MSR_PMON_CTL0,
+       .perf_ctr               = NHMEX_S0_MSR_PMON_CTR0,
+       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
+       .msr_offset             = NHMEX_S_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 1,
+       .ops                    = &nhmex_uncore_sbox_ops,
+       .format_group           = &nhmex_uncore_sbox_format_group
+};
+
+enum {
+       EXTRA_REG_NHMEX_M_FILTER,
+       EXTRA_REG_NHMEX_M_DSP,
+       EXTRA_REG_NHMEX_M_ISS,
+       EXTRA_REG_NHMEX_M_MAP,
+       EXTRA_REG_NHMEX_M_MSC_THR,
+       EXTRA_REG_NHMEX_M_PGT,
+       EXTRA_REG_NHMEX_M_PLD,
+       EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
+};
+
+static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
+       MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
+       MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
+       MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
+       MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
+       /* event 0xa uses two extra registers */
+       MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
+       MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
+       MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
+       /* events 0xd ~ 0x10 use the same extra register */
+       MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
+       MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
+       MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
+       EVENT_EXTRA_END
+};
+
+/* Nehalem-EX or Westmere-EX ? */
+static bool uncore_nhmex;
+
+static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
+{
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       bool ret = false;
+       u64 mask;
+
+       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+               er = &box->shared_regs[idx];
+               raw_spin_lock_irqsave(&er->lock, flags);
+               if (!atomic_read(&er->ref) || er->config == config) {
+                       atomic_inc(&er->ref);
+                       er->config = config;
+                       ret = true;
+               }
+               raw_spin_unlock_irqrestore(&er->lock, flags);
+
+               return ret;
+       }
+       /*
+        * The ZDP_CTL_FVC MSR has 4 fields which are used to control
+        * events 0xd ~ 0x10. Besides these 4 fields, there are additional
+        * fields which are shared.
+        */
+       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+       if (WARN_ON_ONCE(idx >= 4))
+               return false;
+
+       /* mask of the shared fields */
+       if (uncore_nhmex)
+               mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+       else
+               mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
+       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+
+       raw_spin_lock_irqsave(&er->lock, flags);
+       /* add mask of the non-shared field if it's in use */
+       if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
+               if (uncore_nhmex)
+                       mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+               else
+                       mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+       }
+
+       if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
+               atomic_add(1 << (idx * 8), &er->ref);
+               if (uncore_nhmex)
+                       mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+                               NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+               else
+                       mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
+                               WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+               er->config &= ~mask;
+               er->config |= (config & mask);
+               ret = true;
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       return ret;
+}
+
+static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
+{
+       struct intel_uncore_extra_reg *er;
+
+       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+               er = &box->shared_regs[idx];
+               atomic_dec(&er->ref);
+               return;
+       }
+
+       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+       atomic_sub(1 << (idx * 8), &er->ref);
+}
+
+static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+       u64 config = reg1->config;
+
+       /* get the non-shared control bits and shift them */
+       idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+       if (uncore_nhmex)
+               config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+       else
+               config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+       if (new_idx > orig_idx) {
+               idx = new_idx - orig_idx;
+               config <<= 3 * idx;
+       } else {
+               idx = orig_idx - new_idx;
+               config >>= 3 * idx;
+       }
+
+       /* add the shared control bits back */
+       if (uncore_nhmex)
+               config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+       else
+               config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+       config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+       if (modify) {
+               /* adjust the main event selector */
+               if (new_idx > orig_idx)
+                       hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+               else
+                       hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+               reg1->config = config;
+               reg1->idx = ~0xff | new_idx;
+       }
+       return config;
+}
+
+static struct event_constraint *
+nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       int i, idx[2], alloc = 0;
+       u64 config1 = reg1->config;
+
+       idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
+       idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
+again:
+       for (i = 0; i < 2; i++) {
+               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+                       idx[i] = 0xff;
+
+               if (idx[i] == 0xff)
+                       continue;
+
+               if (!nhmex_mbox_get_shared_reg(box, idx[i],
+                               __BITS_VALUE(config1, i, 32)))
+                       goto fail;
+               alloc |= (0x1 << i);
+       }
+
+       /* for the match/mask registers */
+       if (reg2->idx != EXTRA_REG_NONE &&
+           (uncore_box_is_fake(box) || !reg2->alloc) &&
+           !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
+               goto fail;
+
+       /*
+        * If it's a fake box -- as per validate_{group,event}() we
+        * shouldn't touch event state and we can avoid doing so
+        * since both will only call get_event_constraints() once
+        * on each event, this avoids the need for reg->alloc.
+        */
+       if (!uncore_box_is_fake(box)) {
+               if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
+                       nhmex_mbox_alter_er(event, idx[0], true);
+               reg1->alloc |= alloc;
+               if (reg2->idx != EXTRA_REG_NONE)
+                       reg2->alloc = 1;
+       }
+       return NULL;
+fail:
+       if (idx[0] != 0xff && !(alloc & 0x1) &&
+           idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+               /*
+                * events 0xd ~ 0x10 are functional identical, but are
+                * controlled by different fields in the ZDP_CTL_FVC
+                * register. If we failed to take one field, try the
+                * rest 3 choices.
+                */
+               BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
+               idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+               idx[0] = (idx[0] + 1) % 4;
+               idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+               if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
+                       config1 = nhmex_mbox_alter_er(event, idx[0], false);
+                       goto again;
+               }
+       }
+
+       if (alloc & 0x1)
+               nhmex_mbox_put_shared_reg(box, idx[0]);
+       if (alloc & 0x2)
+               nhmex_mbox_put_shared_reg(box, idx[1]);
+       return &uncore_constraint_empty;
+}
+
+static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+       if (uncore_box_is_fake(box))
+               return;
+
+       if (reg1->alloc & 0x1)
+               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
+       if (reg1->alloc & 0x2)
+               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
+       reg1->alloc = 0;
+
+       if (reg2->alloc) {
+               nhmex_mbox_put_shared_reg(box, reg2->idx);
+               reg2->alloc = 0;
+       }
+}
+
+static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
+{
+       if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+               return er->idx;
+       return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
+}
+
+static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_type *type = box->pmu->type;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       struct extra_reg *er;
+       unsigned msr;
+       int reg_idx = 0;
+       /*
+        * The mbox events may require 2 extra MSRs at the most. But only
+        * the lower 32 bits in these MSRs are significant, so we can use
+        * config1 to pass two MSRs' config.
+        */
+       for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               if (event->attr.config1 & ~er->valid_mask)
+                       return -EINVAL;
+
+               msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
+               if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
+                       return -EINVAL;
+
+               /* always use the 32~63 bits to pass the PLD config */
+               if (er->idx == EXTRA_REG_NHMEX_M_PLD)
+                       reg_idx = 1;
+               else if (WARN_ON_ONCE(reg_idx > 0))
+                       return -EINVAL;
+
+               reg1->idx &= ~(0xff << (reg_idx * 8));
+               reg1->reg &= ~(0xffff << (reg_idx * 16));
+               reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
+               reg1->reg |= msr << (reg_idx * 16);
+               reg1->config = event->attr.config1;
+               reg_idx++;
+       }
+       /*
+        * The mbox only provides ability to perform address matching
+        * for the PLD events.
+        */
+       if (reg_idx == 2) {
+               reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+               if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+                       reg2->config = event->attr.config2;
+               else
+                       reg2->config = ~0ULL;
+               if (box->pmu->pmu_idx == 0)
+                       reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+               else
+                       reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+       }
+       return 0;
+}
+
+static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       u64 config;
+
+       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+               return box->shared_regs[idx].config;
+
+       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+       raw_spin_lock_irqsave(&er->lock, flags);
+       config = er->config;
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+       return config;
+}
+
+static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       int idx;
+
+       idx = __BITS_VALUE(reg1->idx, 0, 8);
+       if (idx != 0xff)
+               wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+                       nhmex_mbox_shared_reg_config(box, idx));
+       idx = __BITS_VALUE(reg1->idx, 1, 8);
+       if (idx != 0xff)
+               wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+                       nhmex_mbox_shared_reg_config(box, idx));
+
+       if (reg2->idx != EXTRA_REG_NONE) {
+               wrmsrl(reg2->reg, 0);
+               if (reg2->config != ~0ULL) {
+                       wrmsrl(reg2->reg + 1,
+                               reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+                       wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+                               (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+                       wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+               }
+       }
+
+       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(count_mode,          count_mode,     "config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode,                storage_mode,   "config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode,           wrap_mode,      "config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode,           flag_mode,      "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel,             inc_sel,        "config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel,                set_flag_sel,   "config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en,       filter_cfg_en,  "config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match,                filter_match,   "config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask,         filter_mask,    "config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp,                 dsp,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr,                 thr,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc,                 fvc,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt,                 pgt,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map,                 map,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss,                 iss,            "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld,                 pld,            "config1:32-63");
+
+static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
+       &format_attr_count_mode.attr,
+       &format_attr_storage_mode.attr,
+       &format_attr_wrap_mode.attr,
+       &format_attr_flag_mode.attr,
+       &format_attr_inc_sel.attr,
+       &format_attr_set_flag_sel.attr,
+       &format_attr_filter_cfg_en.attr,
+       &format_attr_filter_match.attr,
+       &format_attr_filter_mask.attr,
+       &format_attr_dsp.attr,
+       &format_attr_thr.attr,
+       &format_attr_fvc.attr,
+       &format_attr_pgt.attr,
+       &format_attr_map.attr,
+       &format_attr_iss.attr,
+       &format_attr_pld.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_mbox_format_group = {
+       .name           = "format",
+       .attrs          = nhmex_uncore_mbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
+       { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
+       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event   = nhmex_mbox_msr_enable_event,
+       .hw_config      = nhmex_mbox_hw_config,
+       .get_constraint = nhmex_mbox_get_constraint,
+       .put_constraint = nhmex_mbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_mbox = {
+       .name                   = "mbox",
+       .num_counters           = 6,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_M0_MSR_PMU_CTL0,
+       .perf_ctr               = NHMEX_M0_MSR_PMU_CNT0,
+       .event_mask             = NHMEX_M_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_M0_MSR_GLOBAL_CTL,
+       .msr_offset             = NHMEX_M_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 8,
+       .event_descs            = nhmex_uncore_mbox_events,
+       .ops                    = &nhmex_uncore_mbox_ops,
+       .format_group           = &nhmex_uncore_mbox_format_group,
+};
+
+static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       /* adjust the main event selector and extra register index */
+       if (reg1->idx % 2) {
+               reg1->idx--;
+               hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+       } else {
+               reg1->idx++;
+               hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+       }
+
+       /* adjust extra register config */
+       switch (reg1->idx % 6) {
+       case 2:
+               /* shift the 8~15 bits to the 0~7 bits */
+               reg1->config >>= 8;
+               break;
+       case 3:
+               /* shift the 0~7 bits to the 8~15 bits */
+               reg1->config <<= 8;
+               break;
+       }
+}
+
+/*
+ * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
+ * An event set consists of 6 events, the 3rd and 4th events in
+ * an event set use the same extra register. So an event set uses
+ * 5 extra registers.
+ */
+static struct event_constraint *
+nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       struct intel_uncore_extra_reg *er;
+       unsigned long flags;
+       int idx, er_idx;
+       u64 config1;
+       bool ok = false;
+
+       if (!uncore_box_is_fake(box) && reg1->alloc)
+               return NULL;
+
+       idx = reg1->idx % 6;
+       config1 = reg1->config;
+again:
+       er_idx = idx;
+       /* the 3rd and 4th events use the same extra register */
+       if (er_idx > 2)
+               er_idx--;
+       er_idx += (reg1->idx / 6) * 5;
+
+       er = &box->shared_regs[er_idx];
+       raw_spin_lock_irqsave(&er->lock, flags);
+       if (idx < 2) {
+               if (!atomic_read(&er->ref) || er->config == reg1->config) {
+                       atomic_inc(&er->ref);
+                       er->config = reg1->config;
+                       ok = true;
+               }
+       } else if (idx == 2 || idx == 3) {
+               /*
+                * these two events use different fields in a extra register,
+                * the 0~7 bits and the 8~15 bits respectively.
+                */
+               u64 mask = 0xff << ((idx - 2) * 8);
+               if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
+                               !((er->config ^ config1) & mask)) {
+                       atomic_add(1 << ((idx - 2) * 8), &er->ref);
+                       er->config &= ~mask;
+                       er->config |= config1 & mask;
+                       ok = true;
+               }
+       } else {
+               if (!atomic_read(&er->ref) ||
+                               (er->config == (hwc->config >> 32) &&
+                                er->config1 == reg1->config &&
+                                er->config2 == reg2->config)) {
+                       atomic_inc(&er->ref);
+                       er->config = (hwc->config >> 32);
+                       er->config1 = reg1->config;
+                       er->config2 = reg2->config;
+                       ok = true;
+               }
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       if (!ok) {
+               /*
+                * The Rbox events are always in pairs. The paired
+                * events are functional identical, but use different
+                * extra registers. If we failed to take an extra
+                * register, try the alternative.
+                */
+               idx ^= 1;
+               if (idx != reg1->idx % 6) {
+                       if (idx == 2)
+                               config1 >>= 8;
+                       else if (idx == 3)
+                               config1 <<= 8;
+                       goto again;
+               }
+       } else {
+               if (!uncore_box_is_fake(box)) {
+                       if (idx != reg1->idx % 6)
+                               nhmex_rbox_alter_er(box, event);
+                       reg1->alloc = 1;
+               }
+               return NULL;
+       }
+       return &uncore_constraint_empty;
+}
+
+static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct intel_uncore_extra_reg *er;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       int idx, er_idx;
+
+       if (uncore_box_is_fake(box) || !reg1->alloc)
+               return;
+
+       idx = reg1->idx % 6;
+       er_idx = idx;
+       if (er_idx > 2)
+               er_idx--;
+       er_idx += (reg1->idx / 6) * 5;
+
+       er = &box->shared_regs[er_idx];
+       if (idx == 2 || idx == 3)
+               atomic_sub(1 << ((idx - 2) * 8), &er->ref);
+       else
+               atomic_dec(&er->ref);
+
+       reg1->alloc = 0;
+}
+
+static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+       int idx;
+
+       idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
+               NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+       if (idx >= 0x18)
+               return -EINVAL;
+
+       reg1->idx = idx;
+       reg1->config = event->attr.config1;
+
+       switch (idx % 6) {
+       case 4:
+       case 5:
+               hwc->config |= event->attr.config & (~0ULL << 32);
+               reg2->config = event->attr.config2;
+               break;
+       }
+       return 0;
+}
+
+static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+       int idx, port;
+
+       idx = reg1->idx;
+       port = idx / 6 + box->pmu->pmu_idx * 4;
+
+       switch (idx % 6) {
+       case 0:
+               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
+               break;
+       case 1:
+               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
+               break;
+       case 2:
+       case 3:
+               wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
+                       uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
+               break;
+       case 4:
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
+                       hwc->config >> 32);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
+               break;
+       case 5:
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
+                       hwc->config >> 32);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
+               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
+               break;
+       }
+
+       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+               (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
+DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
+
+static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
+       &format_attr_event5.attr,
+       &format_attr_xbr_mm_cfg.attr,
+       &format_attr_xbr_match.attr,
+       &format_attr_xbr_mask.attr,
+       &format_attr_qlx_cfg.attr,
+       &format_attr_iperf_cfg.attr,
+       NULL,
+};
+
+static struct attribute_group nhmex_uncore_rbox_format_group = {
+       .name = "format",
+       .attrs = nhmex_uncore_rbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
+       INTEL_UNCORE_EVENT_DESC(qpi0_flit_send,         "event=0x0,iperf_cfg=0x80000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi1_filt_send,         "event=0x6,iperf_cfg=0x80000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt,         "event=0x0,iperf_cfg=0x40000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt,         "event=0x6,iperf_cfg=0x40000000"),
+       INTEL_UNCORE_EVENT_DESC(qpi0_date_response,     "event=0x0,iperf_cfg=0xc4"),
+       INTEL_UNCORE_EVENT_DESC(qpi1_date_response,     "event=0x6,iperf_cfg=0xc4"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
+       NHMEX_UNCORE_OPS_COMMON_INIT(),
+       .enable_event           = nhmex_rbox_msr_enable_event,
+       .hw_config              = nhmex_rbox_hw_config,
+       .get_constraint         = nhmex_rbox_get_constraint,
+       .put_constraint         = nhmex_rbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_rbox = {
+       .name                   = "rbox",
+       .num_counters           = 8,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = NHMEX_R_MSR_PMON_CTL0,
+       .perf_ctr               = NHMEX_R_MSR_PMON_CNT0,
+       .event_mask             = NHMEX_R_PMON_RAW_EVENT_MASK,
+       .box_ctl                = NHMEX_R_MSR_GLOBAL_CTL,
+       .msr_offset             = NHMEX_R_MSR_OFFSET,
+       .pair_ctr_ctl           = 1,
+       .num_shared_regs        = 20,
+       .event_descs            = nhmex_uncore_rbox_events,
+       .ops                    = &nhmex_uncore_rbox_ops,
+       .format_group           = &nhmex_uncore_rbox_format_group
+};
+
+static struct intel_uncore_type *nhmex_msr_uncores[] = {
+       &nhmex_uncore_ubox,
+       &nhmex_uncore_cbox,
+       &nhmex_uncore_bbox,
+       &nhmex_uncore_sbox,
+       &nhmex_uncore_mbox,
+       &nhmex_uncore_rbox,
+       &nhmex_uncore_wbox,
+       NULL,
+};
+
+void nhmex_uncore_cpu_init(void)
+{
+       if (boot_cpu_data.x86_model == 46)
+               uncore_nhmex = true;
+       else
+               nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
+       if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = nhmex_msr_uncores;
+}
+/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
new file mode 100644 (file)
index 0000000..96531d2
--- /dev/null
@@ -0,0 +1,731 @@
+/* Nehalem/SandBridge/Haswell uncore support */
+#include "uncore.h"
+
+/* Uncore IMC PCI IDs */
+#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
+#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
+#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
+#define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
+#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
+
+/* SNB event control */
+#define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
+#define SNB_UNC_CTL_UMASK_MASK                 0x0000ff00
+#define SNB_UNC_CTL_EDGE_DET                   (1 << 18)
+#define SNB_UNC_CTL_EN                         (1 << 22)
+#define SNB_UNC_CTL_INVERT                     (1 << 23)
+#define SNB_UNC_CTL_CMASK_MASK                 0x1f000000
+#define NHM_UNC_CTL_CMASK_MASK                 0xff000000
+#define NHM_UNC_FIXED_CTR_CTL_EN               (1 << 0)
+
+#define SNB_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
+                                                SNB_UNC_CTL_UMASK_MASK | \
+                                                SNB_UNC_CTL_EDGE_DET | \
+                                                SNB_UNC_CTL_INVERT | \
+                                                SNB_UNC_CTL_CMASK_MASK)
+
+#define NHM_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
+                                                SNB_UNC_CTL_UMASK_MASK | \
+                                                SNB_UNC_CTL_EDGE_DET | \
+                                                SNB_UNC_CTL_INVERT | \
+                                                NHM_UNC_CTL_CMASK_MASK)
+
+/* SNB global control register */
+#define SNB_UNC_PERF_GLOBAL_CTL                 0x391
+#define SNB_UNC_FIXED_CTR_CTRL                  0x394
+#define SNB_UNC_FIXED_CTR                       0x395
+
+/* SNB uncore global control */
+#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1)
+#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29)
+
+/* SNB Cbo register */
+#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700
+#define SNB_UNC_CBO_0_PER_CTR0                  0x706
+#define SNB_UNC_CBO_MSR_OFFSET                  0x10
+
+/* SNB ARB register */
+#define SNB_UNC_ARB_PER_CTR0                   0x3b0
+#define SNB_UNC_ARB_PERFEVTSEL0                        0x3b2
+#define SNB_UNC_ARB_MSR_OFFSET                 0x10
+
+/* NHM global control register */
+#define NHM_UNC_PERF_GLOBAL_CTL                 0x391
+#define NHM_UNC_FIXED_CTR                       0x394
+#define NHM_UNC_FIXED_CTR_CTRL                  0x395
+
+/* NHM uncore global control */
+#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1)
+#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32)
+
+/* NHM uncore register */
+#define NHM_UNC_PERFEVTSEL0                     0x3c0
+#define NHM_UNC_UNCORE_PMC0                     0x3b0
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+
+/* Sandy Bridge uncore support */
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+       else
+               wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+}
+
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       wrmsrl(event->hw.config_base, 0);
+}
+
+static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->pmu_idx == 0) {
+               wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+                       SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+       }
+}
+
+static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+       if (box->pmu->pmu_idx == 0)
+               wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct uncore_event_desc snb_uncore_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+       { /* end: all zeroes */ },
+};
+
+static struct attribute *snb_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask5.attr,
+       NULL,
+};
+
+static struct attribute_group snb_uncore_format_group = {
+       .name           = "format",
+       .attrs          = snb_uncore_formats_attr,
+};
+
+static struct intel_uncore_ops snb_uncore_msr_ops = {
+       .init_box       = snb_uncore_msr_init_box,
+       .exit_box       = snb_uncore_msr_exit_box,
+       .disable_event  = snb_uncore_msr_disable_event,
+       .enable_event   = snb_uncore_msr_enable_event,
+       .read_counter   = uncore_msr_read_counter,
+};
+
+static struct event_constraint snb_uncore_arb_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snb_uncore_cbox = {
+       .name           = "cbox",
+       .num_counters   = 2,
+       .num_boxes      = 4,
+       .perf_ctr_bits  = 44,
+       .fixed_ctr_bits = 48,
+       .perf_ctr       = SNB_UNC_CBO_0_PER_CTR0,
+       .event_ctl      = SNB_UNC_CBO_0_PERFEVTSEL0,
+       .fixed_ctr      = SNB_UNC_FIXED_CTR,
+       .fixed_ctl      = SNB_UNC_FIXED_CTR_CTRL,
+       .single_fixed   = 1,
+       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
+       .msr_offset     = SNB_UNC_CBO_MSR_OFFSET,
+       .ops            = &snb_uncore_msr_ops,
+       .format_group   = &snb_uncore_format_group,
+       .event_descs    = snb_uncore_events,
+};
+
+static struct intel_uncore_type snb_uncore_arb = {
+       .name           = "arb",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .perf_ctr       = SNB_UNC_ARB_PER_CTR0,
+       .event_ctl      = SNB_UNC_ARB_PERFEVTSEL0,
+       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
+       .msr_offset     = SNB_UNC_ARB_MSR_OFFSET,
+       .constraints    = snb_uncore_arb_constraints,
+       .ops            = &snb_uncore_msr_ops,
+       .format_group   = &snb_uncore_format_group,
+};
+
+static struct intel_uncore_type *snb_msr_uncores[] = {
+       &snb_uncore_cbox,
+       &snb_uncore_arb,
+       NULL,
+};
+
+void snb_uncore_cpu_init(void)
+{
+       uncore_msr_uncores = snb_msr_uncores;
+       if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+}
+
+enum {
+       SNB_PCI_UNCORE_IMC,
+};
+
+static struct uncore_event_desc snb_uncore_imc_events[] = {
+       INTEL_UNCORE_EVENT_DESC(data_reads,  "event=0x01"),
+       INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
+
+       INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
+       INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
+
+       { /* end: all zeroes */ },
+};
+
+#define SNB_UNCORE_PCI_IMC_EVENT_MASK          0xff
+#define SNB_UNCORE_PCI_IMC_BAR_OFFSET          0x48
+
+/* page size multiple covering all config regs */
+#define SNB_UNCORE_PCI_IMC_MAP_SIZE            0x6000
+
+#define SNB_UNCORE_PCI_IMC_DATA_READS          0x1
+#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE     0x5050
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES         0x2
+#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE    0x5054
+#define SNB_UNCORE_PCI_IMC_CTR_BASE            SNB_UNCORE_PCI_IMC_DATA_READS_BASE
+
+static struct attribute *snb_uncore_imc_formats_attr[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+
+static struct attribute_group snb_uncore_imc_format_group = {
+       .name = "format",
+       .attrs = snb_uncore_imc_formats_attr,
+};
+
+static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
+       resource_size_t addr;
+       u32 pci_dword;
+
+       pci_read_config_dword(pdev, where, &pci_dword);
+       addr = pci_dword;
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+       pci_read_config_dword(pdev, where + 4, &pci_dword);
+       addr |= ((resource_size_t)pci_dword << 32);
+#endif
+
+       addr &= ~(PAGE_SIZE - 1);
+
+       box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+       box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
+}
+
+static void snb_uncore_imc_exit_box(struct intel_uncore_box *box)
+{
+       iounmap(box->io_addr);
+}
+
+static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
+{}
+
+static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{}
+
+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
+}
+
+/*
+ * custom event_init() function because we define our own fixed, free
+ * running counters, so we do not want to conflict with generic uncore
+ * logic. Also simplifies processing
+ */
+static int snb_uncore_imc_event_init(struct perf_event *event)
+{
+       struct intel_uncore_pmu *pmu;
+       struct intel_uncore_box *box;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
+       int idx, base;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       pmu = uncore_event_to_pmu(event);
+       /* no device found for this pmu */
+       if (pmu->func_id < 0)
+               return -ENOENT;
+
+       /* Sampling not supported yet */
+       if (hwc->sample_period)
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       /*
+        * Place all uncore events for a particular physical package
+        * onto a single cpu
+        */
+       if (event->cpu < 0)
+               return -EINVAL;
+
+       /* check only supported bits are set */
+       if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
+               return -EINVAL;
+
+       box = uncore_pmu_to_box(pmu, event->cpu);
+       if (!box || box->cpu < 0)
+               return -EINVAL;
+
+       event->cpu = box->cpu;
+       event->pmu_private = box;
+
+       event->hw.idx = -1;
+       event->hw.last_tag = ~0ULL;
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+       event->hw.branch_reg.idx = EXTRA_REG_NONE;
+       /*
+        * check event is known (whitelist, determines counter)
+        */
+       switch (cfg) {
+       case SNB_UNCORE_PCI_IMC_DATA_READS:
+               base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
+               idx = UNCORE_PMC_IDX_FIXED;
+               break;
+       case SNB_UNCORE_PCI_IMC_DATA_WRITES:
+               base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
+               idx = UNCORE_PMC_IDX_FIXED + 1;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* must be done before validate_group */
+       event->hw.event_base = base;
+       event->hw.config = cfg;
+       event->hw.idx = idx;
+
+       /* no group validation needed, we have free running counters */
+
+       return 0;
+}
+
+static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return 0;
+}
+
+static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       u64 count;
+
+       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+               return;
+
+       event->hw.state = 0;
+       box->n_active++;
+
+       list_add_tail(&event->active_entry, &box->active_list);
+
+       count = snb_uncore_imc_read_counter(box, event);
+       local64_set(&event->hw.prev_count, count);
+
+       if (box->n_active == 1)
+               uncore_pmu_start_hrtimer(box);
+}
+
+static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               box->n_active--;
+
+               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+               hwc->state |= PERF_HES_STOPPED;
+
+               list_del(&event->active_entry);
+
+               if (box->n_active == 0)
+                       uncore_pmu_cancel_hrtimer(box);
+       }
+
+       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+               /*
+                * Drain the remaining delta count out of a event
+                * that we are disabling:
+                */
+               uncore_perf_event_update(box, event);
+               hwc->state |= PERF_HES_UPTODATE;
+       }
+}
+
+static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (!box)
+               return -ENODEV;
+
+       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+       if (!(flags & PERF_EF_START))
+               hwc->state |= PERF_HES_ARCH;
+
+       snb_uncore_imc_event_start(event, 0);
+
+       box->n_events++;
+
+       return 0;
+}
+
+static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
+{
+       struct intel_uncore_box *box = uncore_event_to_box(event);
+       int i;
+
+       snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
+
+       for (i = 0; i < box->n_events; i++) {
+               if (event == box->event_list[i]) {
+                       --box->n_events;
+                       break;
+               }
+       }
+}
+
+int snb_pci2phy_map_init(int devid)
+{
+       struct pci_dev *dev = NULL;
+       struct pci2phy_map *map;
+       int bus, segment;
+
+       dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
+       if (!dev)
+               return -ENOTTY;
+
+       bus = dev->bus->number;
+       segment = pci_domain_nr(dev->bus);
+
+       raw_spin_lock(&pci2phy_map_lock);
+       map = __find_pci2phy_map(segment);
+       if (!map) {
+               raw_spin_unlock(&pci2phy_map_lock);
+               pci_dev_put(dev);
+               return -ENOMEM;
+       }
+       map->pbus_to_physid[bus] = 0;
+       raw_spin_unlock(&pci2phy_map_lock);
+
+       pci_dev_put(dev);
+
+       return 0;
+}
+
+static struct pmu snb_uncore_imc_pmu = {
+       .task_ctx_nr    = perf_invalid_context,
+       .event_init     = snb_uncore_imc_event_init,
+       .add            = snb_uncore_imc_event_add,
+       .del            = snb_uncore_imc_event_del,
+       .start          = snb_uncore_imc_event_start,
+       .stop           = snb_uncore_imc_event_stop,
+       .read           = uncore_pmu_event_read,
+};
+
+static struct intel_uncore_ops snb_uncore_imc_ops = {
+       .init_box       = snb_uncore_imc_init_box,
+       .exit_box       = snb_uncore_imc_exit_box,
+       .enable_box     = snb_uncore_imc_enable_box,
+       .disable_box    = snb_uncore_imc_disable_box,
+       .disable_event  = snb_uncore_imc_disable_event,
+       .enable_event   = snb_uncore_imc_enable_event,
+       .hw_config      = snb_uncore_imc_hw_config,
+       .read_counter   = snb_uncore_imc_read_counter,
+};
+
+static struct intel_uncore_type snb_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .fixed_ctr_bits = 32,
+       .fixed_ctr      = SNB_UNCORE_PCI_IMC_CTR_BASE,
+       .event_descs    = snb_uncore_imc_events,
+       .format_group   = &snb_uncore_imc_format_group,
+       .perf_ctr       = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
+       .event_mask     = SNB_UNCORE_PCI_IMC_EVENT_MASK,
+       .ops            = &snb_uncore_imc_ops,
+       .pmu            = &snb_uncore_imc_pmu,
+};
+
+static struct intel_uncore_type *snb_pci_uncores[] = {
+       [SNB_PCI_UNCORE_IMC]    = &snb_uncore_imc,
+       NULL,
+};
+
+static const struct pci_device_id snb_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id ivb_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id hsw_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static const struct pci_device_id skl_uncore_pci_ids[] = {
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* end: all zeroes */ },
+};
+
+static struct pci_driver snb_uncore_pci_driver = {
+       .name           = "snb_uncore",
+       .id_table       = snb_uncore_pci_ids,
+};
+
+static struct pci_driver ivb_uncore_pci_driver = {
+       .name           = "ivb_uncore",
+       .id_table       = ivb_uncore_pci_ids,
+};
+
+static struct pci_driver hsw_uncore_pci_driver = {
+       .name           = "hsw_uncore",
+       .id_table       = hsw_uncore_pci_ids,
+};
+
+static struct pci_driver bdw_uncore_pci_driver = {
+       .name           = "bdw_uncore",
+       .id_table       = bdw_uncore_pci_ids,
+};
+
+static struct pci_driver skl_uncore_pci_driver = {
+       .name           = "skl_uncore",
+       .id_table       = skl_uncore_pci_ids,
+};
+
+struct imc_uncore_pci_dev {
+       __u32 pci_id;
+       struct pci_driver *driver;
+};
+#define IMC_DEV(a, d) \
+       { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
+
+static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
+       IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
+       IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
+       IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
+       IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+       IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
+       IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
+       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
+       {  /* end marker */ }
+};
+
+
+#define for_each_imc_pci_id(x, t) \
+       for (x = (t); (x)->pci_id; x++)
+
+static struct pci_driver *imc_uncore_find_dev(void)
+{
+       const struct imc_uncore_pci_dev *p;
+       int ret;
+
+       for_each_imc_pci_id(p, desktop_imc_pci_ids) {
+               ret = snb_pci2phy_map_init(p->pci_id);
+               if (ret == 0)
+                       return p->driver;
+       }
+       return NULL;
+}
+
+static int imc_uncore_pci_init(void)
+{
+       struct pci_driver *imc_drv = imc_uncore_find_dev();
+
+       if (!imc_drv)
+               return -ENODEV;
+
+       uncore_pci_uncores = snb_pci_uncores;
+       uncore_pci_driver = imc_drv;
+
+       return 0;
+}
+
+int snb_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+int ivb_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+int hsw_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+int bdw_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+int skl_uncore_pci_init(void)
+{
+       return imc_uncore_pci_init();
+}
+
+/* end of Sandy Bridge uncore support */
+
+/* Nehalem uncore support */
+static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+}
+
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+       else
+               wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+}
+
+static struct attribute *nhm_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_cmask8.attr,
+       NULL,
+};
+
+static struct attribute_group nhm_uncore_format_group = {
+       .name = "format",
+       .attrs = nhm_uncore_formats_attr,
+};
+
+static struct uncore_event_desc nhm_uncore_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0x00"),
+       INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
+       INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"),
+       INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhm_uncore_msr_ops = {
+       .disable_box    = nhm_uncore_msr_disable_box,
+       .enable_box     = nhm_uncore_msr_enable_box,
+       .disable_event  = snb_uncore_msr_disable_event,
+       .enable_event   = nhm_uncore_msr_enable_event,
+       .read_counter   = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type nhm_uncore = {
+       .name           = "",
+       .num_counters   = 8,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .event_ctl      = NHM_UNC_PERFEVTSEL0,
+       .perf_ctr       = NHM_UNC_UNCORE_PMC0,
+       .fixed_ctr      = NHM_UNC_FIXED_CTR,
+       .fixed_ctl      = NHM_UNC_FIXED_CTR_CTRL,
+       .event_mask     = NHM_UNC_RAW_EVENT_MASK,
+       .event_descs    = nhm_uncore_events,
+       .ops            = &nhm_uncore_msr_ops,
+       .format_group   = &nhm_uncore_format_group,
+};
+
+static struct intel_uncore_type *nhm_msr_uncores[] = {
+       &nhm_uncore,
+       NULL,
+};
+
+void nhm_uncore_cpu_init(void)
+{
+       uncore_msr_uncores = nhm_msr_uncores;
+}
+
+/* end of Nehalem uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
new file mode 100644 (file)
index 0000000..93f6bd9
--- /dev/null
@@ -0,0 +1,3135 @@
+/* SandyBridge-EP/IvyTown uncore support */
+#include "uncore.h"
+
+/* SNB-EP Box level control */
+#define SNBEP_PMON_BOX_CTL_RST_CTRL    (1 << 0)
+#define SNBEP_PMON_BOX_CTL_RST_CTRS    (1 << 1)
+#define SNBEP_PMON_BOX_CTL_FRZ         (1 << 8)
+#define SNBEP_PMON_BOX_CTL_FRZ_EN      (1 << 16)
+#define SNBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+                                        SNBEP_PMON_BOX_CTL_RST_CTRS | \
+                                        SNBEP_PMON_BOX_CTL_FRZ_EN)
+/* SNB-EP event control */
+#define SNBEP_PMON_CTL_EV_SEL_MASK     0x000000ff
+#define SNBEP_PMON_CTL_UMASK_MASK      0x0000ff00
+#define SNBEP_PMON_CTL_RST             (1 << 17)
+#define SNBEP_PMON_CTL_EDGE_DET                (1 << 18)
+#define SNBEP_PMON_CTL_EV_SEL_EXT      (1 << 21)
+#define SNBEP_PMON_CTL_EN              (1 << 22)
+#define SNBEP_PMON_CTL_INVERT          (1 << 23)
+#define SNBEP_PMON_CTL_TRESH_MASK      0xff000000
+#define SNBEP_PMON_RAW_EVENT_MASK      (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                        SNBEP_PMON_CTL_UMASK_MASK | \
+                                        SNBEP_PMON_CTL_EDGE_DET | \
+                                        SNBEP_PMON_CTL_INVERT | \
+                                        SNBEP_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP Ubox event control */
+#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK                0x1f000000
+#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK                \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PMON_CTL_UMASK_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_PMON_CTL_INVERT | \
+                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+
+#define SNBEP_CBO_PMON_CTL_TID_EN              (1 << 19)
+#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK      (SNBEP_PMON_RAW_EVENT_MASK | \
+                                                SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* SNB-EP PCU event control */
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK    0x0000c000
+#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK      0x1f000000
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT      (1 << 30)
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET    (1 << 31)
+#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT | \
+                                SNBEP_PMON_CTL_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
+                               (SNBEP_PMON_RAW_EVENT_MASK | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT)
+
+/* SNB-EP pci control register */
+#define SNBEP_PCI_PMON_BOX_CTL                 0xf4
+#define SNBEP_PCI_PMON_CTL0                    0xd8
+/* SNB-EP pci counter register */
+#define SNBEP_PCI_PMON_CTR0                    0xa0
+
+/* SNB-EP home agent register */
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0       0x40
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1       0x44
+#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH      0x48
+/* SNB-EP memory controller register */
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL                0xf0
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR                0xd0
+/* SNB-EP QPI register */
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0         0x228
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1         0x22c
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0          0x238
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1          0x23c
+
+/* SNB-EP Ubox register */
+#define SNBEP_U_MSR_PMON_CTR0                  0xc16
+#define SNBEP_U_MSR_PMON_CTL0                  0xc10
+
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL                0xc08
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR                0xc09
+
+/* SNB-EP Cbo register */
+#define SNBEP_C0_MSR_PMON_CTR0                 0xd16
+#define SNBEP_C0_MSR_PMON_CTL0                 0xd10
+#define SNBEP_C0_MSR_PMON_BOX_CTL              0xd04
+#define SNBEP_C0_MSR_PMON_BOX_FILTER           0xd14
+#define SNBEP_CBO_MSR_OFFSET                   0x20
+
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID      0x1f
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID      0x3fc00
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE    0x7c0000
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC      0xff800000
+
+#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) {   \
+       .event = (e),                           \
+       .msr = SNBEP_C0_MSR_PMON_BOX_FILTER,    \
+       .config_mask = (m),                     \
+       .idx = (i)                              \
+}
+
+/* SNB-EP PCU register */
+#define SNBEP_PCU_MSR_PMON_CTR0                        0xc36
+#define SNBEP_PCU_MSR_PMON_CTL0                        0xc30
+#define SNBEP_PCU_MSR_PMON_BOX_CTL             0xc24
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER          0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK     0xffffffff
+#define SNBEP_PCU_MSR_CORE_C3_CTR              0x3fc
+#define SNBEP_PCU_MSR_CORE_C6_CTR              0x3fd
+
+/* IVBEP event control */
+#define IVBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+                                        SNBEP_PMON_BOX_CTL_RST_CTRS)
+#define IVBEP_PMON_RAW_EVENT_MASK              (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                        SNBEP_PMON_CTL_UMASK_MASK | \
+                                        SNBEP_PMON_CTL_EDGE_DET | \
+                                        SNBEP_PMON_CTL_TRESH_MASK)
+/* IVBEP Ubox */
+#define IVBEP_U_MSR_PMON_GLOBAL_CTL            0xc00
+#define IVBEP_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
+#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL          (1 << 29)
+
+#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK        \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PMON_CTL_UMASK_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+/* IVBEP Cbo */
+#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK              (IVBEP_PMON_RAW_EVENT_MASK | \
+                                                SNBEP_CBO_PMON_CTL_TID_EN)
+
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x1fULL << 0)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 5)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x3fULL << 17)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
+#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
+
+/* IVBEP home agent */
+#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST                (1 << 16)
+#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK               \
+                               (IVBEP_PMON_RAW_EVENT_MASK | \
+                                IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST)
+/* IVBEP PCU */
+#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
+                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+/* IVBEP QPI */
+#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
+                               (IVBEP_PMON_RAW_EVENT_MASK | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT)
+
+#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
+                               ((1ULL << (n)) - 1)))
+
+/* Haswell-EP Ubox */
+#define HSWEP_U_MSR_PMON_CTR0                  0x709
+#define HSWEP_U_MSR_PMON_CTL0                  0x705
+#define HSWEP_U_MSR_PMON_FILTER                        0x707
+
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL                0x703
+#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR                0x704
+
+#define HSWEP_U_MSR_PMON_BOX_FILTER_TID                (0x1 << 0)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_CID                (0x1fULL << 1)
+#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \
+                                       (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \
+                                        HSWEP_U_MSR_PMON_BOX_FILTER_CID)
+
+/* Haswell-EP CBo */
+#define HSWEP_C0_MSR_PMON_CTR0                 0xe08
+#define HSWEP_C0_MSR_PMON_CTL0                 0xe01
+#define HSWEP_C0_MSR_PMON_BOX_CTL                      0xe00
+#define HSWEP_C0_MSR_PMON_BOX_FILTER0          0xe05
+#define HSWEP_CBO_MSR_OFFSET                   0x10
+
+
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x3fULL << 0)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 6)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x7fULL << 17)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
+#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
+
+
+/* Haswell-EP Sbox */
+#define HSWEP_S0_MSR_PMON_CTR0                 0x726
+#define HSWEP_S0_MSR_PMON_CTL0                 0x721
+#define HSWEP_S0_MSR_PMON_BOX_CTL                      0x720
+#define HSWEP_SBOX_MSR_OFFSET                  0xa
+#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
+                                                SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* Haswell-EP PCU */
+#define HSWEP_PCU_MSR_PMON_CTR0                        0x717
+#define HSWEP_PCU_MSR_PMON_CTL0                        0x711
+#define HSWEP_PCU_MSR_PMON_BOX_CTL             0x710
+#define HSWEP_PCU_MSR_PMON_BOX_FILTER          0x715
+
+/* KNL Ubox */
+#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
+                                       (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
+                                               SNBEP_CBO_PMON_CTL_TID_EN)
+/* KNL CHA */
+#define KNL_CHA_MSR_OFFSET                     0xc
+#define KNL_CHA_MSR_PMON_CTL_QOR               (1 << 16)
+#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
+                                       (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
+                                        KNL_CHA_MSR_PMON_CTL_QOR)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_TID                0x1ff
+#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE      (7 << 18)
+#define KNL_CHA_MSR_PMON_BOX_FILTER_OP         (0xfffffe2aULL << 32)
+
+/* KNL EDC/MC UCLK */
+#define KNL_UCLK_MSR_PMON_CTR0_LOW             0x400
+#define KNL_UCLK_MSR_PMON_CTL0                 0x420
+#define KNL_UCLK_MSR_PMON_BOX_CTL              0x430
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW       0x44c
+#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL       0x454
+#define KNL_PMON_FIXED_CTL_EN                  0x1
+
+/* KNL EDC */
+#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW                0xa00
+#define KNL_EDC0_ECLK_MSR_PMON_CTL0            0xa20
+#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL         0xa30
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW  0xa3c
+#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL  0xa44
+
+/* KNL MC */
+#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW          0xb00
+#define KNL_MC0_CH0_MSR_PMON_CTL0              0xb20
+#define KNL_MC0_CH0_MSR_PMON_BOX_CTL           0xb30
+#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW         0xb3c
+#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL         0xb44
+
+/* KNL IRP */
+#define KNL_IRP_PCI_PMON_BOX_CTL               0xf0
+#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
+                                                KNL_CHA_MSR_PMON_CTL_QOR)
+/* KNL PCU */
+#define KNL_PCU_PMON_CTL_EV_SEL_MASK           0x0000007f
+#define KNL_PCU_PMON_CTL_USE_OCC_CTR           (1 << 7)
+#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK                0x3f000000
+#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK        \
+                               (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
+                                KNL_PCU_PMON_CTL_USE_OCC_CTR | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+                                SNBEP_PMON_CTL_EDGE_DET | \
+                                SNBEP_CBO_PMON_CTL_TID_EN | \
+                                SNBEP_PMON_CTL_EV_SEL_EXT | \
+                                SNBEP_PMON_CTL_INVERT | \
+                                KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
+DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
+DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
+DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
+DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
+DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
+DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
+
+static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+       u32 config = 0;
+
+       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+               config |= SNBEP_PMON_BOX_CTL_FRZ;
+               pci_write_config_dword(pdev, box_ctl, config);
+       }
+}
+
+static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+       u32 config = 0;
+
+       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+               pci_write_config_dword(pdev, box_ctl, config);
+       }
+}
+
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 count = 0;
+
+       pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
+       pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+
+       return count;
+}
+
+static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+
+       pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+       u64 config;
+       unsigned msr;
+
+       msr = uncore_msr_box_ctl(box);
+       if (msr) {
+               rdmsrl(msr, config);
+               config |= SNBEP_PMON_BOX_CTL_FRZ;
+               wrmsrl(msr, config);
+       }
+}
+
+static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       u64 config;
+       unsigned msr;
+
+       msr = uncore_msr_box_ctl(box);
+       if (msr) {
+               rdmsrl(msr, config);
+               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+               wrmsrl(msr, config);
+       }
+}
+
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE)
+               wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
+
+       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
+                                       struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       wrmsrl(hwc->config_base, hwc->config);
+}
+
+static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+
+       if (msr)
+               wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static struct attribute *snbep_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid.attr,
+       &format_attr_filter_nid.attr,
+       &format_attr_filter_state.attr,
+       &format_attr_filter_opc.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_pcu_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_occ_sel.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       &format_attr_occ_invert.attr,
+       &format_attr_occ_edge.attr,
+       &format_attr_filter_band0.attr,
+       &format_attr_filter_band1.attr,
+       &format_attr_filter_band2.attr,
+       &format_attr_filter_band3.attr,
+       NULL,
+};
+
+static struct attribute *snbep_uncore_qpi_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_match_rds.attr,
+       &format_attr_match_rnid30.attr,
+       &format_attr_match_rnid4.attr,
+       &format_attr_match_dnid.attr,
+       &format_attr_match_mc.attr,
+       &format_attr_match_opc.attr,
+       &format_attr_match_vnw.attr,
+       &format_attr_match0.attr,
+       &format_attr_match1.attr,
+       &format_attr_mask_rds.attr,
+       &format_attr_mask_rnid30.attr,
+       &format_attr_mask_rnid4.attr,
+       &format_attr_mask_dnid.attr,
+       &format_attr_mask_mc.attr,
+       &format_attr_mask_opc.attr,
+       &format_attr_mask_vnw.attr,
+       &format_attr_mask0.attr,
+       &format_attr_mask1.attr,
+       NULL,
+};
+
+static struct uncore_event_desc snbep_uncore_imc_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0x00"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+       { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc snbep_uncore_qpi_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"),
+       INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
+       INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x102,umask=0x08"),
+       INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x103,umask=0x04"),
+       { /* end: all zeroes */ },
+};
+
+static struct attribute_group snbep_uncore_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_pcu_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_qpi_format_group = {
+       .name = "format",
+       .attrs = snbep_uncore_qpi_formats_attr,
+};
+
+#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                   \
+       .disable_box    = snbep_uncore_msr_disable_box,         \
+       .enable_box     = snbep_uncore_msr_enable_box,          \
+       .disable_event  = snbep_uncore_msr_disable_event,       \
+       .enable_event   = snbep_uncore_msr_enable_event,        \
+       .read_counter   = uncore_msr_read_counter
+
+#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
+       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),                   \
+       .init_box       = snbep_uncore_msr_init_box             \
+
+static struct intel_uncore_ops snbep_uncore_msr_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT()                     \
+       .init_box       = snbep_uncore_pci_init_box,            \
+       .disable_box    = snbep_uncore_pci_disable_box,         \
+       .enable_box     = snbep_uncore_pci_enable_box,          \
+       .disable_event  = snbep_uncore_pci_disable_event,       \
+       .read_counter   = snbep_uncore_pci_read_counter
+
+static struct intel_uncore_ops snbep_uncore_pci_ops = {
+       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+       .enable_event   = snbep_uncore_pci_enable_event,        \
+};
+
+static struct event_constraint snbep_uncore_cbox_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
+       UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
+       UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
+       UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
+       EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snbep_uncore_ubox = {
+       .name           = "ubox",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .fixed_ctr_bits = 48,
+       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
+       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
+       .event_mask     = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .ops            = &snbep_uncore_msr_ops,
+       .format_group   = &snbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
+       EVENT_EXTRA_END
+};
+
+static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+       int i;
+
+       if (uncore_box_is_fake(box))
+               return;
+
+       for (i = 0; i < 5; i++) {
+               if (reg1->alloc & (0x1 << i))
+                       atomic_sub(1 << (i * 6), &er->ref);
+       }
+       reg1->alloc = 0;
+}
+
+static struct event_constraint *
+__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
+                           u64 (*cbox_filter_mask)(int fields))
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+       int i, alloc = 0;
+       unsigned long flags;
+       u64 mask;
+
+       if (reg1->idx == EXTRA_REG_NONE)
+               return NULL;
+
+       raw_spin_lock_irqsave(&er->lock, flags);
+       for (i = 0; i < 5; i++) {
+               if (!(reg1->idx & (0x1 << i)))
+                       continue;
+               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+                       continue;
+
+               mask = cbox_filter_mask(0x1 << i);
+               if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
+                   !((reg1->config ^ er->config) & mask)) {
+                       atomic_add(1 << (i * 6), &er->ref);
+                       er->config &= ~mask;
+                       er->config |= reg1->config & mask;
+                       alloc |= (0x1 << i);
+               } else {
+                       break;
+               }
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+       if (i < 5)
+               goto fail;
+
+       if (!uncore_box_is_fake(box))
+               reg1->alloc |= alloc;
+
+       return NULL;
+fail:
+       for (; i >= 0; i--) {
+               if (alloc & (0x1 << i))
+                       atomic_sub(1 << (i * 6), &er->ref);
+       }
+       return &uncore_constraint_empty;
+}
+
+static u64 snbep_cbox_filter_mask(int fields)
+{
+       u64 mask = 0;
+
+       if (fields & 0x1)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+       if (fields & 0x4)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x8)
+               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+
+       return mask;
+}
+
+static struct event_constraint *
+snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
+}
+
+static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_cbox_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = snbep_cbox_hw_config,
+       .get_constraint         = snbep_cbox_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 8,
+       .perf_ctr_bits          = 44,
+       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
+       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = snbep_uncore_cbox_constraints,
+       .ops                    = &snbep_uncore_cbox_ops,
+       .format_group           = &snbep_uncore_cbox_format_group,
+};
+
+static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       u64 config = reg1->config;
+
+       if (new_idx > reg1->idx)
+               config <<= 8 * (new_idx - reg1->idx);
+       else
+               config >>= 8 * (reg1->idx - new_idx);
+
+       if (modify) {
+               hwc->config += new_idx - reg1->idx;
+               reg1->config = config;
+               reg1->idx = new_idx;
+       }
+       return config;
+}
+
+static struct event_constraint *
+snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+       unsigned long flags;
+       int idx = reg1->idx;
+       u64 mask, config1 = reg1->config;
+       bool ok = false;
+
+       if (reg1->idx == EXTRA_REG_NONE ||
+           (!uncore_box_is_fake(box) && reg1->alloc))
+               return NULL;
+again:
+       mask = 0xffULL << (idx * 8);
+       raw_spin_lock_irqsave(&er->lock, flags);
+       if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
+           !((config1 ^ er->config) & mask)) {
+               atomic_add(1 << (idx * 8), &er->ref);
+               er->config &= ~mask;
+               er->config |= config1 & mask;
+               ok = true;
+       }
+       raw_spin_unlock_irqrestore(&er->lock, flags);
+
+       if (!ok) {
+               idx = (idx + 1) % 4;
+               if (idx != reg1->idx) {
+                       config1 = snbep_pcu_alter_er(event, idx, false);
+                       goto again;
+               }
+               return &uncore_constraint_empty;
+       }
+
+       if (!uncore_box_is_fake(box)) {
+               if (idx != reg1->idx)
+                       snbep_pcu_alter_er(event, idx, true);
+               reg1->alloc = 1;
+       }
+       return NULL;
+}
+
+static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
+
+       if (uncore_box_is_fake(box) || !reg1->alloc)
+               return;
+
+       atomic_sub(1 << (reg1->idx * 8), &er->ref);
+       reg1->alloc = 0;
+}
+
+static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+       if (ev_sel >= 0xb && ev_sel <= 0xe) {
+               reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+               reg1->idx = ev_sel - 0xb;
+               reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
+       }
+       return 0;
+}
+
+static struct intel_uncore_ops snbep_uncore_pcu_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = snbep_pcu_hw_config,
+       .get_constraint         = snbep_pcu_get_constraint,
+       .put_constraint         = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type snbep_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_pcu_ops,
+       .format_group           = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *snbep_msr_uncores[] = {
+       &snbep_uncore_ubox,
+       &snbep_uncore_cbox,
+       &snbep_uncore_pcu,
+       NULL,
+};
+
+void snbep_uncore_cpu_init(void)
+{
+       if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = snbep_msr_uncores;
+}
+
+enum {
+       SNBEP_PCI_QPI_PORT0_FILTER,
+       SNBEP_PCI_QPI_PORT1_FILTER,
+       HSWEP_PCI_PCU_3,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+               reg1->idx = 0;
+               reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+               reg1->config = event->attr.config1;
+               reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+               reg2->config = event->attr.config2;
+       }
+       return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+               int pkg = topology_phys_to_logical_pkg(box->pci_phys_id);
+               struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx];
+
+               if (filter_pdev) {
+                       pci_write_config_dword(filter_pdev, reg1->reg,
+                                               (u32)reg1->config);
+                       pci_write_config_dword(filter_pdev, reg1->reg + 4,
+                                               (u32)(reg1->config >> 32));
+                       pci_write_config_dword(filter_pdev, reg2->reg,
+                                               (u32)reg2->config);
+                       pci_write_config_dword(filter_pdev, reg2->reg + 4,
+                                               (u32)(reg2->config >> 32));
+               }
+       }
+
+       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+       .enable_event           = snbep_qpi_enable_event,
+       .hw_config              = snbep_qpi_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+#define SNBEP_UNCORE_PCI_COMMON_INIT()                         \
+       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
+       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
+       .event_mask     = SNBEP_PMON_RAW_EVENT_MASK,            \
+       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
+       .ops            = &snbep_uncore_pci_ops,                \
+       .format_group   = &snbep_uncore_format_group
+
+static struct intel_uncore_type snbep_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 4,
+       .num_boxes      = 4,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = snbep_uncore_imc_events,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_qpi_ops,
+       .event_descs            = snbep_uncore_qpi_events,
+       .format_group           = &snbep_uncore_qpi_format_group,
+};
+
+
+static struct intel_uncore_type snbep_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 3,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r3qpi_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       SNBEP_PCI_UNCORE_HA,
+       SNBEP_PCI_UNCORE_IMC,
+       SNBEP_PCI_UNCORE_QPI,
+       SNBEP_PCI_UNCORE_R2PCIE,
+       SNBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *snbep_pci_uncores[] = {
+       [SNBEP_PCI_UNCORE_HA]           = &snbep_uncore_ha,
+       [SNBEP_PCI_UNCORE_IMC]          = &snbep_uncore_imc,
+       [SNBEP_PCI_UNCORE_QPI]          = &snbep_uncore_qpi,
+       [SNBEP_PCI_UNCORE_R2PCIE]       = &snbep_uncore_r2pcie,
+       [SNBEP_PCI_UNCORE_R3QPI]        = &snbep_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id snbep_uncore_pci_ids[] = {
+       { /* Home Agent */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
+       },
+       { /* MC Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
+       },
+       { /* QPI Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT0_FILTER),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT1_FILTER),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver snbep_uncore_pci_driver = {
+       .name           = "snbep_uncore",
+       .id_table       = snbep_uncore_pci_ids,
+};
+
+/*
+ * build pci bus to socket mapping
+ */
+static int snbep_pci2phy_map_init(int devid)
+{
+       struct pci_dev *ubox_dev = NULL;
+       int i, bus, nodeid, segment;
+       struct pci2phy_map *map;
+       int err = 0;
+       u32 config = 0;
+
+       while (1) {
+               /* find the UBOX device */
+               ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
+               if (!ubox_dev)
+                       break;
+               bus = ubox_dev->bus->number;
+               /* get the Node ID of the local register */
+               err = pci_read_config_dword(ubox_dev, 0x40, &config);
+               if (err)
+                       break;
+               nodeid = config;
+               /* get the Node ID mapping */
+               err = pci_read_config_dword(ubox_dev, 0x54, &config);
+               if (err)
+                       break;
+
+               segment = pci_domain_nr(ubox_dev->bus);
+               raw_spin_lock(&pci2phy_map_lock);
+               map = __find_pci2phy_map(segment);
+               if (!map) {
+                       raw_spin_unlock(&pci2phy_map_lock);
+                       err = -ENOMEM;
+                       break;
+               }
+
+               /*
+                * every three bits in the Node ID mapping register maps
+                * to a particular node.
+                */
+               for (i = 0; i < 8; i++) {
+                       if (nodeid == ((config >> (3 * i)) & 0x7)) {
+                               map->pbus_to_physid[bus] = i;
+                               break;
+                       }
+               }
+               raw_spin_unlock(&pci2phy_map_lock);
+       }
+
+       if (!err) {
+               /*
+                * For PCI bus with no UBOX device, find the next bus
+                * that has UBOX device and use its mapping.
+                */
+               raw_spin_lock(&pci2phy_map_lock);
+               list_for_each_entry(map, &pci2phy_map_head, list) {
+                       i = -1;
+                       for (bus = 255; bus >= 0; bus--) {
+                               if (map->pbus_to_physid[bus] >= 0)
+                                       i = map->pbus_to_physid[bus];
+                               else
+                                       map->pbus_to_physid[bus] = i;
+                       }
+               }
+               raw_spin_unlock(&pci2phy_map_lock);
+       }
+
+       pci_dev_put(ubox_dev);
+
+       return err ? pcibios_err_to_errno(err) : 0;
+}
+
+int snbep_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x3ce0);
+       if (ret)
+               return ret;
+       uncore_pci_uncores = snbep_pci_uncores;
+       uncore_pci_driver = &snbep_uncore_pci_driver;
+       return 0;
+}
+/* end of Sandy Bridge-EP uncore support */
+
+/* IvyTown uncore support */
+static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+       if (msr)
+               wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+
+       pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
+       .init_box       = ivbep_uncore_msr_init_box,            \
+       .disable_box    = snbep_uncore_msr_disable_box,         \
+       .enable_box     = snbep_uncore_msr_enable_box,          \
+       .disable_event  = snbep_uncore_msr_disable_event,       \
+       .enable_event   = snbep_uncore_msr_enable_event,        \
+       .read_counter   = uncore_msr_read_counter
+
+static struct intel_uncore_ops ivbep_uncore_msr_ops = {
+       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+};
+
+static struct intel_uncore_ops ivbep_uncore_pci_ops = {
+       .init_box       = ivbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = snbep_uncore_pci_disable_event,
+       .enable_event   = snbep_uncore_pci_enable_event,
+       .read_counter   = snbep_uncore_pci_read_counter,
+};
+
+#define IVBEP_UNCORE_PCI_COMMON_INIT()                         \
+       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
+       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
+       .event_mask     = IVBEP_PMON_RAW_EVENT_MASK,            \
+       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
+       .ops            = &ivbep_uncore_pci_ops,                        \
+       .format_group   = &ivbep_uncore_format_group
+
+static struct attribute *ivbep_uncore_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid.attr,
+       &format_attr_filter_link.attr,
+       &format_attr_filter_state2.attr,
+       &format_attr_filter_nid2.attr,
+       &format_attr_filter_opc2.attr,
+       &format_attr_filter_nc.attr,
+       &format_attr_filter_c6.attr,
+       &format_attr_filter_isoc.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_occ_sel.attr,
+       &format_attr_edge.attr,
+       &format_attr_thresh5.attr,
+       &format_attr_occ_invert.attr,
+       &format_attr_occ_edge.attr,
+       &format_attr_filter_band0.attr,
+       &format_attr_filter_band1.attr,
+       &format_attr_filter_band2.attr,
+       &format_attr_filter_band3.attr,
+       NULL,
+};
+
+static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
+       &format_attr_event_ext.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_match_rds.attr,
+       &format_attr_match_rnid30.attr,
+       &format_attr_match_rnid4.attr,
+       &format_attr_match_dnid.attr,
+       &format_attr_match_mc.attr,
+       &format_attr_match_opc.attr,
+       &format_attr_match_vnw.attr,
+       &format_attr_match0.attr,
+       &format_attr_match1.attr,
+       &format_attr_mask_rds.attr,
+       &format_attr_mask_rnid30.attr,
+       &format_attr_mask_rnid4.attr,
+       &format_attr_mask_dnid.attr,
+       &format_attr_mask_mc.attr,
+       &format_attr_mask_opc.attr,
+       &format_attr_mask_vnw.attr,
+       &format_attr_mask0.attr,
+       &format_attr_mask1.attr,
+       NULL,
+};
+
+static struct attribute_group ivbep_uncore_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_pcu_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group ivbep_uncore_qpi_format_group = {
+       .name = "format",
+       .attrs = ivbep_uncore_qpi_formats_attr,
+};
+
+static struct intel_uncore_type ivbep_uncore_ubox = {
+       .name           = "ubox",
+       .num_counters   = 2,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .fixed_ctr_bits = 48,
+       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
+       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
+       .event_mask     = IVBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .ops            = &ivbep_uncore_msr_ops,
+       .format_group   = &ivbep_uncore_ubox_format_group,
+};
+
+static struct extra_reg ivbep_uncore_cbox_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+       EVENT_EXTRA_END
+};
+
+static u64 ivbep_cbox_filter_mask(int fields)
+{
+       u64 mask = 0;
+
+       if (fields & 0x1)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+       if (fields & 0x4)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x8)
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID;
+       if (fields & 0x10) {
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
+               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+       }
+
+       return mask;
+}
+
+static struct event_constraint *
+ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask);
+}
+
+static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               u64 filter = uncore_shared_reg_config(box, 0);
+               wrmsrl(reg1->reg, filter & 0xffffffff);
+               wrmsrl(reg1->reg + 6, filter >> 32);
+       }
+
+       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
+       .init_box               = ivbep_uncore_msr_init_box,
+       .disable_box            = snbep_uncore_msr_disable_box,
+       .enable_box             = snbep_uncore_msr_enable_box,
+       .disable_event          = snbep_uncore_msr_disable_event,
+       .enable_event           = ivbep_cbox_enable_event,
+       .read_counter           = uncore_msr_read_counter,
+       .hw_config              = ivbep_cbox_hw_config,
+       .get_constraint         = ivbep_cbox_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 15,
+       .perf_ctr_bits          = 44,
+       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
+       .event_mask             = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = snbep_uncore_cbox_constraints,
+       .ops                    = &ivbep_uncore_cbox_ops,
+       .format_group           = &ivbep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_pcu_ops = {
+       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = snbep_pcu_hw_config,
+       .get_constraint         = snbep_pcu_get_constraint,
+       .put_constraint         = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &ivbep_uncore_pcu_ops,
+       .format_group           = &ivbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *ivbep_msr_uncores[] = {
+       &ivbep_uncore_ubox,
+       &ivbep_uncore_cbox,
+       &ivbep_uncore_pcu,
+       NULL,
+};
+
+void ivbep_uncore_cpu_init(void)
+{
+       if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = ivbep_msr_uncores;
+}
+
+static struct intel_uncore_type ivbep_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 4,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 48,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 4,
+       .num_boxes      = 8,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = snbep_uncore_imc_events,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+/* registers in IRP boxes are not properly aligned */
+static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
+static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
+
+static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx],
+                              hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config);
+}
+
+static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 count = 0;
+
+       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+       return count;
+}
+
+static struct intel_uncore_ops ivbep_uncore_irp_ops = {
+       .init_box       = ivbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = ivbep_uncore_irp_disable_event,
+       .enable_event   = ivbep_uncore_irp_enable_event,
+       .read_counter   = ivbep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type ivbep_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_mask             = IVBEP_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .ops                    = &ivbep_uncore_irp_ops,
+       .format_group           = &ivbep_uncore_format_group,
+};
+
+static struct intel_uncore_ops ivbep_uncore_qpi_ops = {
+       .init_box       = ivbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = snbep_uncore_pci_disable_event,
+       .enable_event   = snbep_qpi_enable_event,
+       .read_counter   = snbep_uncore_pci_read_counter,
+       .hw_config      = snbep_qpi_hw_config,
+       .get_constraint = uncore_get_constraint,
+       .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type ivbep_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 4,
+       .num_boxes              = 3,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &ivbep_uncore_qpi_ops,
+       .format_group           = &ivbep_uncore_qpi_format_group,
+};
+
+static struct intel_uncore_type ivbep_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r2pcie_constraints,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type ivbep_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 3,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 44,
+       .constraints    = snbep_uncore_r3qpi_constraints,
+       IVBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       IVBEP_PCI_UNCORE_HA,
+       IVBEP_PCI_UNCORE_IMC,
+       IVBEP_PCI_UNCORE_IRP,
+       IVBEP_PCI_UNCORE_QPI,
+       IVBEP_PCI_UNCORE_R2PCIE,
+       IVBEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *ivbep_pci_uncores[] = {
+       [IVBEP_PCI_UNCORE_HA]   = &ivbep_uncore_ha,
+       [IVBEP_PCI_UNCORE_IMC]  = &ivbep_uncore_imc,
+       [IVBEP_PCI_UNCORE_IRP]  = &ivbep_uncore_irp,
+       [IVBEP_PCI_UNCORE_QPI]  = &ivbep_uncore_qpi,
+       [IVBEP_PCI_UNCORE_R2PCIE]       = &ivbep_uncore_r2pcie,
+       [IVBEP_PCI_UNCORE_R3QPI]        = &ivbep_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id ivbep_uncore_pci_ids[] = {
+       { /* Home Agent 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0),
+       },
+       { /* Home Agent 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1),
+       },
+       { /* MC0 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC0 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC0 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC0 Channel 4 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3),
+       },
+       { /* MC1 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4),
+       },
+       { /* MC1 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5),
+       },
+       { /* MC1 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6),
+       },
+       { /* MC1 Channel 4 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0),
+       },
+       { /* QPI0 Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI0 Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1),
+       },
+       { /* QPI1 Port 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI0 Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI0 Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* R3QPI1 Link 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
+               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT0_FILTER),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT1_FILTER),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver ivbep_uncore_pci_driver = {
+       .name           = "ivbep_uncore",
+       .id_table       = ivbep_uncore_pci_ids,
+};
+
+int ivbep_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x0e1e);
+       if (ret)
+               return ret;
+       uncore_pci_uncores = ivbep_pci_uncores;
+       uncore_pci_driver = &ivbep_uncore_pci_driver;
+       return 0;
+}
+/* end of IvyTown uncore support */
+
+/* KNL uncore support */
+static struct attribute *knl_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_ubox = {
+       .name                   = "ubox",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
+       .event_mask             = KNL_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .ops                    = &snbep_uncore_msr_ops,
+       .format_group           = &knl_uncore_ubox_format_group,
+};
+
+static struct attribute *knl_uncore_cha_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_qor.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid4.attr,
+       &format_attr_filter_link3.attr,
+       &format_attr_filter_state4.attr,
+       &format_attr_filter_local.attr,
+       &format_attr_filter_all_op.attr,
+       &format_attr_filter_nnm.attr,
+       &format_attr_filter_opc3.attr,
+       &format_attr_filter_nc.attr,
+       &format_attr_filter_isoc.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_cha_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_cha_formats_attr,
+};
+
+static struct event_constraint knl_uncore_cha_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg knl_uncore_cha_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
+       EVENT_EXTRA_END
+};
+
+static u64 knl_cha_filter_mask(int fields)
+{
+       u64 mask = 0;
+
+       if (fields & 0x1)
+               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x4)
+               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
+       return mask;
+}
+
+static struct event_constraint *
+knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
+}
+
+static int knl_cha_hw_config(struct intel_uncore_box *box,
+                            struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+                           KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+                                   struct perf_event *event);
+
+static struct intel_uncore_ops knl_uncore_cha_ops = {
+       .init_box               = snbep_uncore_msr_init_box,
+       .disable_box            = snbep_uncore_msr_disable_box,
+       .enable_box             = snbep_uncore_msr_enable_box,
+       .disable_event          = snbep_uncore_msr_disable_event,
+       .enable_event           = hswep_cbox_enable_event,
+       .read_counter           = uncore_msr_read_counter,
+       .hw_config              = knl_cha_hw_config,
+       .get_constraint         = knl_cha_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type knl_uncore_cha = {
+       .name                   = "cha",
+       .num_counters           = 4,
+       .num_boxes              = 38,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
+       .event_mask             = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = KNL_CHA_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = knl_uncore_cha_constraints,
+       .ops                    = &knl_uncore_cha_ops,
+       .format_group           = &knl_uncore_cha_format_group,
+};
+
+static struct attribute *knl_uncore_pcu_formats_attr[] = {
+       &format_attr_event2.attr,
+       &format_attr_use_occ_ctr.attr,
+       &format_attr_occ_sel.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh6.attr,
+       &format_attr_occ_invert.attr,
+       &format_attr_occ_edge_det.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_pcu_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_pcu_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
+       .ops                    = &snbep_uncore_msr_ops,
+       .format_group           = &knl_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *knl_msr_uncores[] = {
+       &knl_uncore_ubox,
+       &knl_uncore_cha,
+       &knl_uncore_pcu,
+       NULL,
+};
+
+void knl_uncore_cpu_init(void)
+{
+       uncore_msr_uncores = knl_msr_uncores;
+}
+
+static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       int box_ctl = uncore_pci_box_ctl(box);
+
+       pci_write_config_dword(pdev, box_ctl, 0);
+}
+
+static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
+                                       struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+
+       if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
+                                                       == UNCORE_FIXED_EVENT)
+               pci_write_config_dword(pdev, hwc->config_base,
+                                      hwc->config | KNL_PMON_FIXED_CTL_EN);
+       else
+               pci_write_config_dword(pdev, hwc->config_base,
+                                      hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops knl_uncore_imc_ops = {
+       .init_box       = snbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = knl_uncore_imc_enable_box,
+       .read_counter   = snbep_uncore_pci_read_counter,
+       .enable_event   = knl_uncore_imc_enable_event,
+       .disable_event  = snbep_uncore_pci_disable_event,
+};
+
+static struct intel_uncore_type knl_uncore_imc_uclk = {
+       .name                   = "imc_uclk",
+       .num_counters           = 4,
+       .num_boxes              = 2,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_imc_dclk = {
+       .name                   = "imc",
+       .num_counters           = 4,
+       .num_boxes              = 6,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_MC0_CH0_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
+       .fixed_ctl              = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
+       .box_ctl                = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_uclk = {
+       .name                   = "edc_uclk",
+       .num_counters           = 4,
+       .num_boxes              = 8,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
+       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
+       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type knl_uncore_edc_eclk = {
+       .name                   = "edc_eclk",
+       .num_counters           = 4,
+       .num_boxes              = 8,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
+       .event_ctl              = KNL_EDC0_ECLK_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
+       .fixed_ctl              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
+       .box_ctl                = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
+       .ops                    = &knl_uncore_imc_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct event_constraint knl_uncore_m2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type knl_uncore_m2pcie = {
+       .name           = "m2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .constraints    = knl_uncore_m2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct attribute *knl_uncore_irp_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_qor.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute_group knl_uncore_irp_format_group = {
+       .name = "format",
+       .attrs = knl_uncore_irp_formats_attr,
+};
+
+static struct intel_uncore_type knl_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = KNL_IRP_PCI_PMON_BOX_CTL,
+       .ops                    = &snbep_uncore_pci_ops,
+       .format_group           = &knl_uncore_irp_format_group,
+};
+
+enum {
+       KNL_PCI_UNCORE_MC_UCLK,
+       KNL_PCI_UNCORE_MC_DCLK,
+       KNL_PCI_UNCORE_EDC_UCLK,
+       KNL_PCI_UNCORE_EDC_ECLK,
+       KNL_PCI_UNCORE_M2PCIE,
+       KNL_PCI_UNCORE_IRP,
+};
+
+static struct intel_uncore_type *knl_pci_uncores[] = {
+       [KNL_PCI_UNCORE_MC_UCLK]        = &knl_uncore_imc_uclk,
+       [KNL_PCI_UNCORE_MC_DCLK]        = &knl_uncore_imc_dclk,
+       [KNL_PCI_UNCORE_EDC_UCLK]       = &knl_uncore_edc_uclk,
+       [KNL_PCI_UNCORE_EDC_ECLK]       = &knl_uncore_edc_eclk,
+       [KNL_PCI_UNCORE_M2PCIE]         = &knl_uncore_m2pcie,
+       [KNL_PCI_UNCORE_IRP]            = &knl_uncore_irp,
+       NULL,
+};
+
+/*
+ * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
+ * device type. prior to KNL, each instance of a PMU device type had a unique
+ * device ID.
+ *
+ *     PCI Device ID   Uncore PMU Devices
+ *     ----------------------------------
+ *     0x7841          MC0 UClk, MC1 UClk
+ *     0x7843          MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
+ *                     MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
+ *     0x7833          EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
+ *                     EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
+ *     0x7835          EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
+ *                     EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
+ *     0x7817          M2PCIe
+ *     0x7814          IRP
+*/
+
+static const struct pci_device_id knl_uncore_pci_ids[] = {
+       { /* MC UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+       },
+       { /* MC DClk Channel */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+       },
+       { /* EDC UClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+       },
+       { /* EDC EClk */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+       },
+       { /* M2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
+               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver knl_uncore_pci_driver = {
+       .name           = "knl_uncore",
+       .id_table       = knl_uncore_pci_ids,
+};
+
+int knl_uncore_pci_init(void)
+{
+       int ret;
+
+       /* All KNL PCI based PMON units are on the same PCI bus except IRP */
+       ret = snb_pci2phy_map_init(0x7814); /* IRP */
+       if (ret)
+               return ret;
+       ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
+       if (ret)
+               return ret;
+       uncore_pci_uncores = knl_pci_uncores;
+       uncore_pci_driver = &knl_uncore_pci_driver;
+       return 0;
+}
+
+/* end of KNL uncore support */
+
+/* Haswell-EP uncore support */
+static struct attribute *hswep_uncore_ubox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh5.attr,
+       &format_attr_filter_tid2.attr,
+       &format_attr_filter_cid.attr,
+       NULL,
+};
+
+static struct attribute_group hswep_uncore_ubox_format_group = {
+       .name = "format",
+       .attrs = hswep_uncore_ubox_formats_attr,
+};
+
+static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       reg1->reg = HSWEP_U_MSR_PMON_FILTER;
+       reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK;
+       reg1->idx = 0;
+       return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_ubox_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = hswep_ubox_hw_config,
+       .get_constraint         = uncore_get_constraint,
+       .put_constraint         = uncore_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_ubox = {
+       .name                   = "ubox",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 44,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &hswep_uncore_ubox_ops,
+       .format_group           = &hswep_uncore_ubox_format_group,
+};
+
+static struct attribute *hswep_uncore_cbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_thresh8.attr,
+       &format_attr_filter_tid3.attr,
+       &format_attr_filter_link2.attr,
+       &format_attr_filter_state3.attr,
+       &format_attr_filter_nid2.attr,
+       &format_attr_filter_opc2.attr,
+       &format_attr_filter_nc.attr,
+       &format_attr_filter_c6.attr,
+       &format_attr_filter_isoc.attr,
+       NULL,
+};
+
+static struct attribute_group hswep_uncore_cbox_format_group = {
+       .name = "format",
+       .attrs = hswep_uncore_cbox_formats_attr,
+};
+
+static struct event_constraint hswep_uncore_cbox_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg hswep_uncore_cbox_extra_regs[] = {
+       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
+                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
+       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
+       EVENT_EXTRA_END
+};
+
+static u64 hswep_cbox_filter_mask(int fields)
+{
+       u64 mask = 0;
+       if (fields & 0x1)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID;
+       if (fields & 0x2)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK;
+       if (fields & 0x4)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE;
+       if (fields & 0x8)
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID;
+       if (fields & 0x10) {
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC;
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC;
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6;
+               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
+       }
+       return mask;
+}
+
+static struct event_constraint *
+hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+       return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask);
+}
+
+static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+       struct extra_reg *er;
+       int idx = 0;
+
+       for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) {
+               if (er->event != (event->hw.config & er->config_mask))
+                       continue;
+               idx |= er->idx;
+       }
+
+       if (idx) {
+               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+                           HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+               reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx);
+               reg1->idx = idx;
+       }
+       return 0;
+}
+
+static void hswep_cbox_enable_event(struct intel_uncore_box *box,
+                                 struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+       if (reg1->idx != EXTRA_REG_NONE) {
+               u64 filter = uncore_shared_reg_config(box, 0);
+               wrmsrl(reg1->reg, filter & 0xffffffff);
+               wrmsrl(reg1->reg + 1, filter >> 32);
+       }
+
+       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops hswep_uncore_cbox_ops = {
+       .init_box               = snbep_uncore_msr_init_box,
+       .disable_box            = snbep_uncore_msr_disable_box,
+       .enable_box             = snbep_uncore_msr_enable_box,
+       .disable_event          = snbep_uncore_msr_disable_event,
+       .enable_event           = hswep_cbox_enable_event,
+       .read_counter           = uncore_msr_read_counter,
+       .hw_config              = hswep_cbox_hw_config,
+       .get_constraint         = hswep_cbox_get_constraint,
+       .put_constraint         = snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 18,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
+       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = hswep_uncore_cbox_constraints,
+       .ops                    = &hswep_uncore_cbox_ops,
+       .format_group           = &hswep_uncore_cbox_format_group,
+};
+
+/*
+ * Write SBOX Initialization register bit by bit to avoid spurious #GPs
+ */
+static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
+{
+       unsigned msr = uncore_msr_box_ctl(box);
+
+       if (msr) {
+               u64 init = SNBEP_PMON_BOX_CTL_INT;
+               u64 flags = 0;
+               int i;
+
+               for_each_set_bit(i, (unsigned long *)&init, 64) {
+                       flags |= (1ULL << i);
+                       wrmsrl(msr, flags);
+               }
+       }
+}
+
+static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
+       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .init_box               = hswep_uncore_sbox_msr_init_box
+};
+
+static struct attribute *hswep_uncore_sbox_formats_attr[] = {
+       &format_attr_event.attr,
+       &format_attr_umask.attr,
+       &format_attr_edge.attr,
+       &format_attr_tid_en.attr,
+       &format_attr_inv.attr,
+       &format_attr_thresh8.attr,
+       NULL,
+};
+
+static struct attribute_group hswep_uncore_sbox_format_group = {
+       .name = "format",
+       .attrs = hswep_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_type hswep_uncore_sbox = {
+       .name                   = "sbox",
+       .num_counters           = 4,
+       .num_boxes              = 4,
+       .perf_ctr_bits          = 44,
+       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
+       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
+       .ops                    = &hswep_uncore_sbox_msr_ops,
+       .format_group           = &hswep_uncore_sbox_format_group,
+};
+
+static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
+
+       if (ev_sel >= 0xb && ev_sel <= 0xe) {
+               reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
+               reg1->idx = ev_sel - 0xb;
+               reg1->config = event->attr.config1 & (0xff << reg1->idx);
+       }
+       return 0;
+}
+
+static struct intel_uncore_ops hswep_uncore_pcu_ops = {
+       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+       .hw_config              = hswep_pcu_hw_config,
+       .get_constraint         = snbep_pcu_get_constraint,
+       .put_constraint         = snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type hswep_uncore_pcu = {
+       .name                   = "pcu",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &hswep_uncore_pcu_ops,
+       .format_group           = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *hswep_msr_uncores[] = {
+       &hswep_uncore_ubox,
+       &hswep_uncore_cbox,
+       &hswep_uncore_sbox,
+       &hswep_uncore_pcu,
+       NULL,
+};
+
+void hswep_uncore_cpu_init(void)
+{
+       int pkg = topology_phys_to_logical_pkg(0);
+
+       if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+
+       /* Detect 6-8 core systems with only two SBOXes */
+       if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
+               u32 capid4;
+
+               pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3],
+                                     0x94, &capid4);
+               if (((capid4 >> 6) & 0x3) == 0)
+                       hswep_uncore_sbox.num_boxes = 2;
+       }
+
+       uncore_msr_uncores = hswep_msr_uncores;
+}
+
+static struct intel_uncore_type hswep_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 5,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 48,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct uncore_event_desc hswep_uncore_imc_events[] = {
+       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0x00,umask=0x00"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
+       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
+       { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type hswep_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 5,
+       .num_boxes      = 8,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = hswep_uncore_imc_events,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
+
+static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+       struct pci_dev *pdev = box->pci_dev;
+       struct hw_perf_event *hwc = &event->hw;
+       u64 count = 0;
+
+       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
+       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
+
+       return count;
+}
+
+static struct intel_uncore_ops hswep_uncore_irp_ops = {
+       .init_box       = snbep_uncore_pci_init_box,
+       .disable_box    = snbep_uncore_pci_disable_box,
+       .enable_box     = snbep_uncore_pci_enable_box,
+       .disable_event  = ivbep_uncore_irp_disable_event,
+       .enable_event   = ivbep_uncore_irp_enable_event,
+       .read_counter   = hswep_uncore_irp_read_counter,
+};
+
+static struct intel_uncore_type hswep_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .ops                    = &hswep_uncore_irp_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type hswep_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 5,
+       .num_boxes              = 3,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_qpi_ops,
+       .format_group           = &snbep_uncore_qpi_format_group,
+};
+
+static struct event_constraint hswep_uncore_r2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x24, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x27, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2a, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .constraints    = hswep_uncore_r2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type hswep_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 4,
+       .num_boxes      = 3,
+       .perf_ctr_bits  = 44,
+       .constraints    = hswep_uncore_r3qpi_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       HSWEP_PCI_UNCORE_HA,
+       HSWEP_PCI_UNCORE_IMC,
+       HSWEP_PCI_UNCORE_IRP,
+       HSWEP_PCI_UNCORE_QPI,
+       HSWEP_PCI_UNCORE_R2PCIE,
+       HSWEP_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *hswep_pci_uncores[] = {
+       [HSWEP_PCI_UNCORE_HA]   = &hswep_uncore_ha,
+       [HSWEP_PCI_UNCORE_IMC]  = &hswep_uncore_imc,
+       [HSWEP_PCI_UNCORE_IRP]  = &hswep_uncore_irp,
+       [HSWEP_PCI_UNCORE_QPI]  = &hswep_uncore_qpi,
+       [HSWEP_PCI_UNCORE_R2PCIE]       = &hswep_uncore_r2pcie,
+       [HSWEP_PCI_UNCORE_R3QPI]        = &hswep_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id hswep_uncore_pci_ids[] = {
+       { /* Home Agent 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
+       },
+       { /* Home Agent 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1),
+       },
+       { /* MC0 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC0 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC0 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC0 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3),
+       },
+       { /* MC1 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4),
+       },
+       { /* MC1 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5),
+       },
+       { /* MC1 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6),
+       },
+       { /* MC1 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0),
+       },
+       { /* QPI0 Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI0 Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1),
+       },
+       { /* QPI1 Port 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI0 Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI0 Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* R3QPI1 Link 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e),
+               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT0_FILTER),
+       },
+       { /* QPI Port 1 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  SNBEP_PCI_QPI_PORT1_FILTER),
+       },
+       { /* PCU.3 (for Capability registers) */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+                                                  HSWEP_PCI_PCU_3),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver hswep_uncore_pci_driver = {
+       .name           = "hswep_uncore",
+       .id_table       = hswep_uncore_pci_ids,
+};
+
+int hswep_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x2f1e);
+       if (ret)
+               return ret;
+       uncore_pci_uncores = hswep_pci_uncores;
+       uncore_pci_driver = &hswep_uncore_pci_driver;
+       return 0;
+}
+/* end of Haswell-EP uncore support */
+
+/* BDX uncore support */
+
+static struct intel_uncore_type bdx_uncore_ubox = {
+       .name                   = "ubox",
+       .num_counters           = 2,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .fixed_ctr_bits         = 48,
+       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
+       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
+       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &ivbep_uncore_msr_ops,
+       .format_group           = &ivbep_uncore_ubox_format_group,
+};
+
+static struct event_constraint bdx_uncore_cbox_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_cbox = {
+       .name                   = "cbox",
+       .num_counters           = 4,
+       .num_boxes              = 24,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
+       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
+       .num_shared_regs        = 1,
+       .constraints            = bdx_uncore_cbox_constraints,
+       .ops                    = &hswep_uncore_cbox_ops,
+       .format_group           = &hswep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_type bdx_uncore_sbox = {
+       .name                   = "sbox",
+       .num_counters           = 4,
+       .num_boxes              = 4,
+       .perf_ctr_bits          = 48,
+       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
+       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
+       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
+       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
+       .ops                    = &hswep_uncore_sbox_msr_ops,
+       .format_group           = &hswep_uncore_sbox_format_group,
+};
+
+#define BDX_MSR_UNCORE_SBOX    3
+
+static struct intel_uncore_type *bdx_msr_uncores[] = {
+       &bdx_uncore_ubox,
+       &bdx_uncore_cbox,
+       &hswep_uncore_pcu,
+       &bdx_uncore_sbox,
+       NULL,
+};
+
+void bdx_uncore_cpu_init(void)
+{
+       if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+               bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+       uncore_msr_uncores = bdx_msr_uncores;
+
+       /* BDX-DE doesn't have SBOX */
+       if (boot_cpu_data.x86_model == 86)
+               uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+}
+
+static struct intel_uncore_type bdx_uncore_ha = {
+       .name           = "ha",
+       .num_counters   = 4,
+       .num_boxes      = 2,
+       .perf_ctr_bits  = 48,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_imc = {
+       .name           = "imc",
+       .num_counters   = 5,
+       .num_boxes      = 8,
+       .perf_ctr_bits  = 48,
+       .fixed_ctr_bits = 48,
+       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+       .event_descs    = hswep_uncore_imc_events,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type bdx_uncore_irp = {
+       .name                   = "irp",
+       .num_counters           = 4,
+       .num_boxes              = 1,
+       .perf_ctr_bits          = 48,
+       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .ops                    = &hswep_uncore_irp_ops,
+       .format_group           = &snbep_uncore_format_group,
+};
+
+static struct intel_uncore_type bdx_uncore_qpi = {
+       .name                   = "qpi",
+       .num_counters           = 4,
+       .num_boxes              = 3,
+       .perf_ctr_bits          = 48,
+       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
+       .event_ctl              = SNBEP_PCI_PMON_CTL0,
+       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
+       .num_shared_regs        = 1,
+       .ops                    = &snbep_uncore_qpi_ops,
+       .format_group           = &snbep_uncore_qpi_format_group,
+};
+
+static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r2pcie = {
+       .name           = "r2pcie",
+       .num_counters   = 4,
+       .num_boxes      = 1,
+       .perf_ctr_bits  = 48,
+       .constraints    = bdx_uncore_r2pcie_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
+       UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
+       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+       EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type bdx_uncore_r3qpi = {
+       .name           = "r3qpi",
+       .num_counters   = 3,
+       .num_boxes      = 3,
+       .perf_ctr_bits  = 48,
+       .constraints    = bdx_uncore_r3qpi_constraints,
+       SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+enum {
+       BDX_PCI_UNCORE_HA,
+       BDX_PCI_UNCORE_IMC,
+       BDX_PCI_UNCORE_IRP,
+       BDX_PCI_UNCORE_QPI,
+       BDX_PCI_UNCORE_R2PCIE,
+       BDX_PCI_UNCORE_R3QPI,
+};
+
+static struct intel_uncore_type *bdx_pci_uncores[] = {
+       [BDX_PCI_UNCORE_HA]     = &bdx_uncore_ha,
+       [BDX_PCI_UNCORE_IMC]    = &bdx_uncore_imc,
+       [BDX_PCI_UNCORE_IRP]    = &bdx_uncore_irp,
+       [BDX_PCI_UNCORE_QPI]    = &bdx_uncore_qpi,
+       [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
+       [BDX_PCI_UNCORE_R3QPI]  = &bdx_uncore_r3qpi,
+       NULL,
+};
+
+static const struct pci_device_id bdx_uncore_pci_ids[] = {
+       { /* Home Agent 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
+       },
+       { /* Home Agent 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
+       },
+       { /* MC0 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
+       },
+       { /* MC0 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
+       },
+       { /* MC0 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
+       },
+       { /* MC0 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
+       },
+       { /* MC1 Channel 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
+       },
+       { /* MC1 Channel 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
+       },
+       { /* MC1 Channel 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
+       },
+       { /* MC1 Channel 3 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
+       },
+       { /* IRP */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
+       },
+       { /* QPI0 Port 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
+       },
+       { /* QPI0 Port 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
+       },
+       { /* QPI1 Port 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
+       },
+       { /* R2PCIe */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
+       },
+       { /* R3QPI0 Link 0 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
+       },
+       { /* R3QPI0 Link 1 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
+       },
+       { /* R3QPI1 Link 2 */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
+               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
+       },
+       { /* QPI Port 0 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
+       },
+       { /* QPI Port 1 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
+       },
+       { /* QPI Port 2 filter  */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
+               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
+       },
+       { /* end: all zeroes */ }
+};
+
+static struct pci_driver bdx_uncore_pci_driver = {
+       .name           = "bdx_uncore",
+       .id_table       = bdx_uncore_pci_ids,
+};
+
+int bdx_uncore_pci_init(void)
+{
+       int ret = snbep_pci2phy_map_init(0x6f1e);
+
+       if (ret)
+               return ret;
+       uncore_pci_uncores = bdx_pci_uncores;
+       uncore_pci_driver = &bdx_uncore_pci_driver;
+       return 0;
+}
+
+/* end of BDX uncore support */
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
new file mode 100644 (file)
index 0000000..ec863b9
--- /dev/null
@@ -0,0 +1,241 @@
+#include <linux/perf_event.h>
+
+enum perf_msr_id {
+       PERF_MSR_TSC                    = 0,
+       PERF_MSR_APERF                  = 1,
+       PERF_MSR_MPERF                  = 2,
+       PERF_MSR_PPERF                  = 3,
+       PERF_MSR_SMI                    = 4,
+
+       PERF_MSR_EVENT_MAX,
+};
+
+static bool test_aperfmperf(int idx)
+{
+       return boot_cpu_has(X86_FEATURE_APERFMPERF);
+}
+
+static bool test_intel(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_MSR_SMI)
+                       return true;
+               break;
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
+struct perf_msr {
+       u64     msr;
+       struct  perf_pmu_events_attr *attr;
+       bool    (*test)(int idx);
+};
+
+PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
+PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
+
+static struct perf_msr msr[] = {
+       [PERF_MSR_TSC]   = { 0,                 &evattr_tsc,    NULL,            },
+       [PERF_MSR_APERF] = { MSR_IA32_APERF,    &evattr_aperf,  test_aperfmperf, },
+       [PERF_MSR_MPERF] = { MSR_IA32_MPERF,    &evattr_mperf,  test_aperfmperf, },
+       [PERF_MSR_PPERF] = { MSR_PPERF,         &evattr_pperf,  test_intel,      },
+       [PERF_MSR_SMI]   = { MSR_SMI_COUNT,     &evattr_smi,    test_intel,      },
+};
+
+static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
+       NULL,
+};
+
+static struct attribute_group events_attr_group = {
+       .name = "events",
+       .attrs = events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *format_attrs[] = {
+       &format_attr_event.attr,
+       NULL,
+};
+static struct attribute_group format_attr_group = {
+       .name = "format",
+       .attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+       &events_attr_group,
+       &format_attr_group,
+       NULL,
+};
+
+static int msr_event_init(struct perf_event *event)
+{
+       u64 cfg = event->attr.config;
+
+       if (event->attr.type != event->pmu->type)
+               return -ENOENT;
+
+       if (cfg >= PERF_MSR_EVENT_MAX)
+               return -EINVAL;
+
+       /* unsupported modes and filters */
+       if (event->attr.exclude_user   ||
+           event->attr.exclude_kernel ||
+           event->attr.exclude_hv     ||
+           event->attr.exclude_idle   ||
+           event->attr.exclude_host   ||
+           event->attr.exclude_guest  ||
+           event->attr.sample_period) /* no sampling */
+               return -EINVAL;
+
+       if (!msr[cfg].attr)
+               return -EINVAL;
+
+       event->hw.idx = -1;
+       event->hw.event_base = msr[cfg].msr;
+       event->hw.config = cfg;
+
+       return 0;
+}
+
+static inline u64 msr_read_counter(struct perf_event *event)
+{
+       u64 now;
+
+       if (event->hw.event_base)
+               rdmsrl(event->hw.event_base, now);
+       else
+               rdtscll(now);
+
+       return now;
+}
+static void msr_event_update(struct perf_event *event)
+{
+       u64 prev, now;
+       s64 delta;
+
+       /* Careful, an NMI might modify the previous event value. */
+again:
+       prev = local64_read(&event->hw.prev_count);
+       now = msr_read_counter(event);
+
+       if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
+               goto again;
+
+       delta = now - prev;
+       if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
+               delta = sign_extend64(delta, 31);
+
+       local64_add(now - prev, &event->count);
+}
+
+static void msr_event_start(struct perf_event *event, int flags)
+{
+       u64 now;
+
+       now = msr_read_counter(event);
+       local64_set(&event->hw.prev_count, now);
+}
+
+static void msr_event_stop(struct perf_event *event, int flags)
+{
+       msr_event_update(event);
+}
+
+static void msr_event_del(struct perf_event *event, int flags)
+{
+       msr_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int msr_event_add(struct perf_event *event, int flags)
+{
+       if (flags & PERF_EF_START)
+               msr_event_start(event, flags);
+
+       return 0;
+}
+
+static struct pmu pmu_msr = {
+       .task_ctx_nr    = perf_sw_context,
+       .attr_groups    = attr_groups,
+       .event_init     = msr_event_init,
+       .add            = msr_event_add,
+       .del            = msr_event_del,
+       .start          = msr_event_start,
+       .stop           = msr_event_stop,
+       .read           = msr_event_update,
+       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init msr_init(void)
+{
+       int i, j = 0;
+
+       if (!boot_cpu_has(X86_FEATURE_TSC)) {
+               pr_cont("no MSR PMU driver.\n");
+               return 0;
+       }
+
+       /* Probe the MSRs. */
+       for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
+               u64 val;
+
+               /*
+                * Virt sucks arse; you cannot tell if a R/O MSR is present :/
+                */
+               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+                       msr[i].attr = NULL;
+       }
+
+       /* List remaining MSRs in the sysfs attrs. */
+       for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
+               if (msr[i].attr)
+                       events_attrs[j++] = &msr[i].attr->attr.attr;
+       }
+       events_attrs[j] = NULL;
+
+       perf_pmu_register(&pmu_msr, "msr", -1);
+
+       return 0;
+}
+device_initcall(msr_init);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
new file mode 100644 (file)
index 0000000..68155ca
--- /dev/null
@@ -0,0 +1,960 @@
+/*
+ * Performance events x86 architecture header
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *  Copyright (C) 2009 Google, Inc., Stephane Eranian
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+/* To enable MSR tracing please use the generic trace points. */
+
+/*
+ *          |   NHM/WSM    |      SNB     |
+ * register -------------------------------
+ *          |  HT  | no HT |  HT  | no HT |
+ *-----------------------------------------
+ * offcore  | core | core  | cpu  | core  |
+ * lbr_sel  | core | core  | cpu  | core  |
+ * ld_lat   | cpu  | core  | cpu  | core  |
+ *-----------------------------------------
+ *
+ * Given that there is a small number of shared regs,
+ * we can pre-allocate their slot in the per-cpu
+ * per-core reg tables.
+ */
+enum extra_reg_type {
+       EXTRA_REG_NONE  = -1,   /* not used */
+
+       EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
+       EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
+       EXTRA_REG_LBR   = 2,    /* lbr_select */
+       EXTRA_REG_LDLAT = 3,    /* ld_lat_threshold */
+       EXTRA_REG_FE    = 4,    /* fe_* */
+
+       EXTRA_REG_MAX           /* number of entries needed */
+};
+
+struct event_constraint {
+       union {
+               unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+               u64             idxmsk64;
+       };
+       u64     code;
+       u64     cmask;
+       int     weight;
+       int     overlap;
+       int     flags;
+};
+/*
+ * struct hw_perf_event.flags flags
+ */
+#define PERF_X86_EVENT_PEBS_LDLAT      0x0001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST         0x0002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW     0x0004 /* haswell style datala, store */
+#define PERF_X86_EVENT_COMMITTED       0x0008 /* event passed commit_txn */
+#define PERF_X86_EVENT_PEBS_LD_HSW     0x0010 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW     0x0020 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL            0x0040 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC         0x0080 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED   0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT       0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD     0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING     0x0800 /* use freerunning PEBS */
+
+
+struct amd_nb {
+       int nb_id;  /* NorthBridge id */
+       int refcnt; /* reference count */
+       struct perf_event *owners[X86_PMC_IDX_MAX];
+       struct event_constraint event_constraints[X86_PMC_IDX_MAX];
+};
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS                8
+
+/*
+ * Flags PEBS can handle without an PMI.
+ *
+ * TID can only be handled by flushing at context switch.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
+       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+       PERF_SAMPLE_TRANSACTION)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+       u64     bts_buffer_base;
+       u64     bts_index;
+       u64     bts_absolute_maximum;
+       u64     bts_interrupt_threshold;
+       u64     pebs_buffer_base;
+       u64     pebs_index;
+       u64     pebs_absolute_maximum;
+       u64     pebs_interrupt_threshold;
+       u64     pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+/*
+ * Per register state.
+ */
+struct er_account {
+       raw_spinlock_t          lock;   /* per-core: protect structure */
+       u64                 config;     /* extra MSR config */
+       u64                 reg;        /* extra MSR number */
+       atomic_t            ref;        /* reference count */
+};
+
+/*
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
+ */
+struct intel_shared_regs {
+       struct er_account       regs[EXTRA_REG_MAX];
+       int                     refcnt;         /* per-core: #HT threads */
+       unsigned                core_id;        /* per-core: core id */
+};
+
+enum intel_excl_state_type {
+       INTEL_EXCL_UNUSED    = 0, /* counter is unused */
+       INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
+       INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
+};
+
+struct intel_excl_states {
+       enum intel_excl_state_type state[X86_PMC_IDX_MAX];
+       bool sched_started; /* true if scheduling has started */
+};
+
+struct intel_excl_cntrs {
+       raw_spinlock_t  lock;
+
+       struct intel_excl_states states[2];
+
+       union {
+               u16     has_exclusive[2];
+               u32     exclusive_present;
+       };
+
+       int             refcnt;         /* per-core: #HT threads */
+       unsigned        core_id;        /* per-core: core id */
+};
+
+#define MAX_LBR_ENTRIES                32
+
+enum {
+       X86_PERF_KFREE_SHARED = 0,
+       X86_PERF_KFREE_EXCL   = 1,
+       X86_PERF_KFREE_MAX
+};
+
+struct cpu_hw_events {
+       /*
+        * Generic x86 PMC bits
+        */
+       struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
+       unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+       int                     enabled;
+
+       int                     n_events; /* the # of events in the below arrays */
+       int                     n_added;  /* the # last events in the below arrays;
+                                            they've never been enabled yet */
+       int                     n_txn;    /* the # last events in the below arrays;
+                                            added in the current transaction */
+       int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+       u64                     tags[X86_PMC_IDX_MAX];
+
+       struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+       struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
+
+       int                     n_excl; /* the number of exclusive events */
+
+       unsigned int            txn_flags;
+       int                     is_fake;
+
+       /*
+        * Intel DebugStore bits
+        */
+       struct debug_store      *ds;
+       u64                     pebs_enabled;
+
+       /*
+        * Intel LBR bits
+        */
+       int                             lbr_users;
+       void                            *lbr_context;
+       struct perf_branch_stack        lbr_stack;
+       struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
+       struct er_account               *lbr_sel;
+       u64                             br_sel;
+
+       /*
+        * Intel host/guest exclude bits
+        */
+       u64                             intel_ctrl_guest_mask;
+       u64                             intel_ctrl_host_mask;
+       struct perf_guest_switch_msr    guest_switch_msrs[X86_PMC_IDX_MAX];
+
+       /*
+        * Intel checkpoint mask
+        */
+       u64                             intel_cp_status;
+
+       /*
+        * manage shared (per-core, per-cpu) registers
+        * used on Intel NHM/WSM/SNB
+        */
+       struct intel_shared_regs        *shared_regs;
+       /*
+        * manage exclusive counter access between hyperthread
+        */
+       struct event_constraint *constraint_list; /* in enable order */
+       struct intel_excl_cntrs         *excl_cntrs;
+       int excl_thread_id; /* 0 or 1 */
+
+       /*
+        * AMD specific bits
+        */
+       struct amd_nb                   *amd_nb;
+       /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
+       u64                             perf_ctr_virt_mask;
+
+       void                            *kfree_on_online[X86_PERF_KFREE_MAX];
+};
+
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+       { .idxmsk64 = (n) },            \
+       .code = (c),                    \
+       .cmask = (m),                   \
+       .weight = (w),                  \
+       .overlap = (o),                 \
+       .flags = f,                     \
+}
+
+#define EVENT_CONSTRAINT(c, n, m)      \
+       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+
+#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
+       __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
+                          0, PERF_X86_EVENT_EXCL)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m)      \
+       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
+
+/*
+ * Constraint on the Event code.
+ */
+#define INTEL_EVENT_CONSTRAINT(c, n)   \
+       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
+/*
+ * Constraint on the Event code + UMask + fixed-mask
+ *
+ * filter mask to validate fixed counter events.
+ * the following filters disqualify for fixed counters:
+ *  - inv
+ *  - edge
+ *  - cnt-mask
+ *  - in_tx
+ *  - in_tx_checkpointed
+ *  The other filters are supported by fixed counters.
+ *  The any-thread option is supported starting with v3.
+ */
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
+#define FIXED_EVENT_CONSTRAINT(c, n)   \
+       EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
+
+/*
+ * Constraint on the Event code + UMask
+ */
+#define INTEL_UEVENT_CONSTRAINT(c, n)  \
+       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+
+/* Constraint on specific umask bit only + event */
+#define INTEL_UBIT_EVENT_CONSTRAINT(c, n)      \
+       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
+
+/* Like UEVENT_CONSTRAINT, but match flags too */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)    \
+       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+#define INTEL_EXCLUEVT_CONSTRAINT(c, n)        \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+                          HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
+
+#define INTEL_PLD_CONSTRAINT(c, n)     \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                          HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
+
+#define INTEL_PST_CONSTRAINT(c, n)     \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(code, n)    \
+       EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+
+/* Check flags and event code, and set the HSW store flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
+/* Check flags and event code, and set the HSW load flag */
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, \
+                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW store flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, \
+                         PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW load flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, \
+                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
+
+/* Check flags and event code/umask, and set the HSW N/A flag */
+#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
+       __EVENT_CONSTRAINT(code, n,                     \
+                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
+
+
+/*
+ * We define the end marker as having a weight of -1
+ * to enable blacklisting of events using a counter bitmask
+ * of zero and thus a weight of zero.
+ * The end marker has a weight that cannot possibly be
+ * obtained from counting the bits in the bitmask.
+ */
+#define EVENT_CONSTRAINT_END { .weight = -1 }
+
+/*
+ * Check for end marker with weight == -1
+ */
+#define for_each_event_constraint(e, c)        \
+       for ((e) = (c); (e)->weight != -1; (e)++)
+
+/*
+ * Extra registers for specific events.
+ *
+ * Some events need large masks and require external MSRs.
+ * Those extra MSRs end up being shared for all events on
+ * a PMU and sometimes between PMU of sibling HT threads.
+ * In either case, the kernel needs to handle conflicting
+ * accesses to those extra, shared, regs. The data structure
+ * to manage those registers is stored in cpu_hw_event.
+ */
+struct extra_reg {
+       unsigned int            event;
+       unsigned int            msr;
+       u64                     config_mask;
+       u64                     valid_mask;
+       int                     idx;  /* per_xxx->regs[] reg index */
+       bool                    extra_msr_access;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm, i) {     \
+       .event = (e),                   \
+       .msr = (ms),                    \
+       .config_mask = (m),             \
+       .valid_mask = (vm),             \
+       .idx = EXTRA_REG_##i,           \
+       .extra_msr_access = true,       \
+       }
+
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)     \
+       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+
+#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
+       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
+                       ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
+
+#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
+       INTEL_UEVENT_EXTRA_REG(c, \
+                              MSR_PEBS_LD_LAT_THRESHOLD, \
+                              0xffff, \
+                              LDLAT)
+
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
+
+union perf_capabilities {
+       struct {
+               u64     lbr_format:6;
+               u64     pebs_trap:1;
+               u64     pebs_arch_reg:1;
+               u64     pebs_format:4;
+               u64     smm_freeze:1;
+               /*
+                * PMU supports separate counter range for writing
+                * values > 32bit.
+                */
+               u64     full_width_write:1;
+       };
+       u64     capabilities;
+};
+
+struct x86_pmu_quirk {
+       struct x86_pmu_quirk *next;
+       void (*func)(void);
+};
+
+union x86_pmu_config {
+       struct {
+               u64 event:8,
+                   umask:8,
+                   usr:1,
+                   os:1,
+                   edge:1,
+                   pc:1,
+                   interrupt:1,
+                   __reserved1:1,
+                   en:1,
+                   inv:1,
+                   cmask:8,
+                   event2:4,
+                   __reserved2:4,
+                   go:1,
+                   ho:1;
+       } bits;
+       u64 value;
+};
+
+#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+
+enum {
+       x86_lbr_exclusive_lbr,
+       x86_lbr_exclusive_bts,
+       x86_lbr_exclusive_pt,
+       x86_lbr_exclusive_max,
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+       /*
+        * Generic x86 PMC bits
+        */
+       const char      *name;
+       int             version;
+       int             (*handle_irq)(struct pt_regs *);
+       void            (*disable_all)(void);
+       void            (*enable_all)(int added);
+       void            (*enable)(struct perf_event *);
+       void            (*disable)(struct perf_event *);
+       int             (*hw_config)(struct perf_event *event);
+       int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+       unsigned        eventsel;
+       unsigned        perfctr;
+       int             (*addr_offset)(int index, bool eventsel);
+       int             (*rdpmc_index)(int index);
+       u64             (*event_map)(int);
+       int             max_events;
+       int             num_counters;
+       int             num_counters_fixed;
+       int             cntval_bits;
+       u64             cntval_mask;
+       union {
+                       unsigned long events_maskl;
+                       unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
+       };
+       int             events_mask_len;
+       int             apic;
+       u64             max_period;
+       struct event_constraint *
+                       (*get_event_constraints)(struct cpu_hw_events *cpuc,
+                                                int idx,
+                                                struct perf_event *event);
+
+       void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
+                                                struct perf_event *event);
+
+       void            (*start_scheduling)(struct cpu_hw_events *cpuc);
+
+       void            (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
+       void            (*stop_scheduling)(struct cpu_hw_events *cpuc);
+
+       struct event_constraint *event_constraints;
+       struct x86_pmu_quirk *quirks;
+       int             perfctr_second_write;
+       bool            late_ack;
+       unsigned        (*limit_period)(struct perf_event *event, unsigned l);
+
+       /*
+        * sysfs attrs
+        */
+       int             attr_rdpmc_broken;
+       int             attr_rdpmc;
+       struct attribute **format_attrs;
+       struct attribute **event_attrs;
+
+       ssize_t         (*events_sysfs_show)(char *page, u64 config);
+       struct attribute **cpu_events;
+
+       /*
+        * CPU Hotplug hooks
+        */
+       int             (*cpu_prepare)(int cpu);
+       void            (*cpu_starting)(int cpu);
+       void            (*cpu_dying)(int cpu);
+       void            (*cpu_dead)(int cpu);
+
+       void            (*check_microcode)(void);
+       void            (*sched_task)(struct perf_event_context *ctx,
+                                     bool sched_in);
+
+       /*
+        * Intel Arch Perfmon v2+
+        */
+       u64                     intel_ctrl;
+       union perf_capabilities intel_cap;
+
+       /*
+        * Intel DebugStore bits
+        */
+       unsigned int    bts             :1,
+                       bts_active      :1,
+                       pebs            :1,
+                       pebs_active     :1,
+                       pebs_broken     :1,
+                       pebs_prec_dist  :1;
+       int             pebs_record_size;
+       int             pebs_buffer_size;
+       void            (*drain_pebs)(struct pt_regs *regs);
+       struct event_constraint *pebs_constraints;
+       void            (*pebs_aliases)(struct perf_event *event);
+       int             max_pebs_events;
+       unsigned long   free_running_flags;
+
+       /*
+        * Intel LBR
+        */
+       unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
+       int             lbr_nr;                    /* hardware stack size */
+       u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
+       const int       *lbr_sel_map;              /* lbr_select mappings */
+       bool            lbr_double_abort;          /* duplicated lbr aborts */
+
+       /*
+        * Intel PT/LBR/BTS are exclusive
+        */
+       atomic_t        lbr_exclusive[x86_lbr_exclusive_max];
+
+       /*
+        * Extra registers for events
+        */
+       struct extra_reg *extra_regs;
+       unsigned int flags;
+
+       /*
+        * Intel host/guest support (KVM)
+        */
+       struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+};
+
+struct x86_perf_task_context {
+       u64 lbr_from[MAX_LBR_ENTRIES];
+       u64 lbr_to[MAX_LBR_ENTRIES];
+       u64 lbr_info[MAX_LBR_ENTRIES];
+       int tos;
+       int lbr_callstack_users;
+       int lbr_stack_state;
+};
+
+#define x86_add_quirk(func_)                                           \
+do {                                                                   \
+       static struct x86_pmu_quirk __quirk __initdata = {              \
+               .func = func_,                                          \
+       };                                                              \
+       __quirk.next = x86_pmu.quirks;                                  \
+       x86_pmu.quirks = &__quirk;                                      \
+} while (0)
+
+/*
+ * x86_pmu flags
+ */
+#define PMU_FL_NO_HT_SHARING   0x1 /* no hyper-threading resource sharing */
+#define PMU_FL_HAS_RSP_1       0x2 /* has 2 equivalent offcore_rsp regs   */
+#define PMU_FL_EXCL_CNTRS      0x4 /* has exclusive counter requirements  */
+#define PMU_FL_EXCL_ENABLED    0x8 /* exclusive counter active */
+
+#define EVENT_VAR(_id)  event_attr_##_id
+#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+
+#define EVENT_ATTR(_name, _id)                                         \
+static struct perf_pmu_events_attr EVENT_VAR(_id) = {                  \
+       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+       .id             = PERF_COUNT_HW_##_id,                          \
+       .event_str      = NULL,                                         \
+};
+
+#define EVENT_ATTR_STR(_name, v, str)                                  \
+static struct perf_pmu_events_attr event_attr_##v = {                  \
+       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
+       .id             = 0,                                            \
+       .event_str      = str,                                          \
+};
+
+extern struct x86_pmu x86_pmu __read_mostly;
+
+static inline bool x86_pmu_has_lbr_callstack(void)
+{
+       return  x86_pmu.lbr_sel_map &&
+               x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+}
+
+DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+int x86_perf_event_set_period(struct perf_event *event);
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+extern u64 __read_mostly hw_cache_event_ids
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+extern u64 __read_mostly hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+u64 x86_perf_event_update(struct perf_event *event);
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+       return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+                                  x86_pmu.addr_offset(index, true) : index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+       return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+                                 x86_pmu.addr_offset(index, false) : index);
+}
+
+static inline int x86_pmu_rdpmc_index(int index)
+{
+       return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
+}
+
+int x86_add_exclusive(unsigned int what);
+
+void x86_del_exclusive(unsigned int what);
+
+int x86_reserve_hardware(void);
+
+void x86_release_hardware(void);
+
+void hw_perf_lbr_event_destroy(struct perf_event *event);
+
+int x86_setup_perfctr(struct perf_event *event);
+
+int x86_pmu_hw_config(struct perf_event *event);
+
+void x86_pmu_disable_all(void);
+
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+                                         u64 enable_mask)
+{
+       u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
+
+       if (hwc->extra_reg.reg)
+               wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
+       wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
+}
+
+void x86_pmu_enable_all(int added);
+
+int perf_assign_events(struct event_constraint **constraints, int n,
+                       int wmin, int wmax, int gpmax, int *assign);
+int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
+
+void x86_pmu_stop(struct perf_event *event, int flags);
+
+static inline void x86_pmu_disable_event(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       wrmsrl(hwc->config_base, hwc->config);
+}
+
+void x86_pmu_enable_event(struct perf_event *event);
+
+int x86_pmu_handle_irq(struct pt_regs *regs);
+
+extern struct event_constraint emptyconstraint;
+
+extern struct event_constraint unconstrained;
+
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+       return ip > PAGE_OFFSET;
+#else
+       return (long)ip < 0;
+#endif
+}
+
+/*
+ * Not all PMUs provide the right context information to place the reported IP
+ * into full context. Specifically segment registers are typically not
+ * supplied.
+ *
+ * Assuming the address is a linear address (it is for IBS), we fake the CS and
+ * vm86 mode using the known zero-based code segment and 'fix up' the registers
+ * to reflect this.
+ *
+ * Intel PEBS/LBR appear to typically provide the effective address, nothing
+ * much we can do about that but pray and treat it like a linear address.
+ */
+static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
+{
+       regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
+       if (regs->flags & X86_VM_MASK)
+               regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
+       regs->ip = ip;
+}
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
+ssize_t intel_event_sysfs_show(char *page, u64 config);
+
+struct attribute **merge_attr(struct attribute **a, struct attribute **b);
+
+#ifdef CONFIG_CPU_SUP_AMD
+
+int amd_pmu_init(void);
+
+#else /* CONFIG_CPU_SUP_AMD */
+
+static inline int amd_pmu_init(void)
+{
+       return 0;
+}
+
+#endif /* CONFIG_CPU_SUP_AMD */
+
+#ifdef CONFIG_CPU_SUP_INTEL
+
+static inline bool intel_pmu_has_bts(struct perf_event *event)
+{
+       if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+           !event->attr.freq && event->hw.sample_period == 1)
+               return true;
+
+       return false;
+}
+
+int intel_pmu_save_and_restart(struct perf_event *event);
+
+struct event_constraint *
+x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event);
+
+struct intel_shared_regs *allocate_shared_regs(int cpu);
+
+int intel_pmu_init(void);
+
+void init_debug_store_on_cpu(int cpu);
+
+void fini_debug_store_on_cpu(int cpu);
+
+void release_ds_buffers(void);
+
+void reserve_ds_buffers(void);
+
+extern struct event_constraint bts_constraint;
+
+void intel_pmu_enable_bts(u64 config);
+
+void intel_pmu_disable_bts(void);
+
+int intel_pmu_drain_bts_buffer(void);
+
+extern struct event_constraint intel_core2_pebs_event_constraints[];
+
+extern struct event_constraint intel_atom_pebs_event_constraints[];
+
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
+extern struct event_constraint intel_nehalem_pebs_event_constraints[];
+
+extern struct event_constraint intel_westmere_pebs_event_constraints[];
+
+extern struct event_constraint intel_snb_pebs_event_constraints[];
+
+extern struct event_constraint intel_ivb_pebs_event_constraints[];
+
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
+extern struct event_constraint intel_bdw_pebs_event_constraints[];
+
+extern struct event_constraint intel_skl_pebs_event_constraints[];
+
+struct event_constraint *intel_pebs_constraints(struct perf_event *event);
+
+void intel_pmu_pebs_enable(struct perf_event *event);
+
+void intel_pmu_pebs_disable(struct perf_event *event);
+
+void intel_pmu_pebs_enable_all(void);
+
+void intel_pmu_pebs_disable_all(void);
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
+void intel_ds_init(void);
+
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
+void intel_pmu_lbr_reset(void);
+
+void intel_pmu_lbr_enable(struct perf_event *event);
+
+void intel_pmu_lbr_disable(struct perf_event *event);
+
+void intel_pmu_lbr_enable_all(bool pmi);
+
+void intel_pmu_lbr_disable_all(void);
+
+void intel_pmu_lbr_read(void);
+
+void intel_pmu_lbr_init_core(void);
+
+void intel_pmu_lbr_init_nhm(void);
+
+void intel_pmu_lbr_init_atom(void);
+
+void intel_pmu_lbr_init_snb(void);
+
+void intel_pmu_lbr_init_hsw(void);
+
+void intel_pmu_lbr_init_skl(void);
+
+void intel_pmu_lbr_init_knl(void);
+
+void intel_pmu_pebs_data_source_nhm(void);
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event);
+
+void intel_pt_interrupt(void);
+
+int intel_bts_interrupt(void);
+
+void intel_bts_enable_local(void);
+
+void intel_bts_disable_local(void);
+
+int p4_pmu_init(void);
+
+int p6_pmu_init(void);
+
+int knc_pmu_init(void);
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+                         char *page);
+
+static inline int is_ht_workaround_enabled(void)
+{
+       return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
+}
+
+#else /* CONFIG_CPU_SUP_INTEL */
+
+static inline void reserve_ds_buffers(void)
+{
+}
+
+static inline void release_ds_buffers(void)
+{
+}
+
+static inline int intel_pmu_init(void)
+{
+       return 0;
+}
+
+static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+       return NULL;
+}
+
+static inline int is_ht_workaround_enabled(void)
+{
+       return 0;
+}
+#endif /* CONFIG_CPU_SUP_INTEL */
index a584e1c50918406a0398cf4a013432e47abf143e..bfb28caf97b1be1f2d6aa8893bd905a39030e310 100644 (file)
@@ -6,18 +6,17 @@
 
 /*
  * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
+ * And yes, this might be required on UP too when we're talking
  * to devices.
  */
 
 #ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \
+                                     X86_FEATURE_XMM2) ::: "memory", "cc")
+#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \
+                                      X86_FEATURE_XMM2) ::: "memory", "cc")
+#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \
+                                      X86_FEATURE_XMM2) ::: "memory", "cc")
 #else
 #define mb()   asm volatile("mfence":::"memory")
 #define rmb()  asm volatile("lfence":::"memory")
index e63aa38e85fb23375ecefa40efe8cd9d76da6c43..61518cf79437679788bd41a1ee1c942aea29a8cb 100644 (file)
@@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size);
 
 #define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
 
-#ifdef CONFIG_DEBUG_RODATA
-void mark_rodata_ro(void);
 extern const int rodata_test_data;
 extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
 
 #ifdef CONFIG_DEBUG_RODATA_TEST
 int rodata_test(void);
index 1514753fd43553e079696712b48a8d08b6966e98..15340e36ddcb3364e16eb63cd61c61a42676d756 100644 (file)
@@ -256,7 +256,7 @@ extern int force_personality32;
    instruction set this CPU supports.  This could be done in user space,
    but it's not easy, and we've already done it here.  */
 
-#define ELF_HWCAP              (boot_cpu_data.x86_capability[0])
+#define ELF_HWCAP              (boot_cpu_data.x86_capability[CPUID_1_EDX])
 
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
index af30fdeb140da75691e91c52446f7f1bb7fe2e93..f23cd8c80b1c818057053e831ec704ab4006fba4 100644 (file)
 
 /* Supported features which support lazy state saving */
 #define XFEATURE_MASK_LAZY     (XFEATURE_MASK_FP | \
-                                XFEATURE_MASK_SSE)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER    (XFEATURE_MASK_BNDREGS | \
-                                XFEATURE_MASK_BNDCSR | \
+                                XFEATURE_MASK_SSE | \
                                 XFEATURE_MASK_YMM | \
                                 XFEATURE_MASK_OPMASK | \
                                 XFEATURE_MASK_ZMM_Hi256 | \
                                 XFEATURE_MASK_Hi16_ZMM)
 
+/* Supported features which require eager state saving */
+#define XFEATURE_MASK_EAGER    (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
+
 /* All currently supported features */
 #define XCNTXT_MASK    (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
 
index c1adf33fdd0d6f70f055b9a056bc7787bda7635e..bc62e7cbf1b1f883fc9acb0a145305384a3bf062 100644 (file)
@@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 }
 #endif /* CONFIG_KVM_GUEST */
 
-#ifdef CONFIG_DEBUG_RODATA
 #define KVM_HYPERCALL \
         ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
-#else
-/* On AMD processors, vmcall will generate a trap that we will
- * then rewrite to the appropriate instruction.
- */
-#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
-#endif
 
 /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
  * instruction.  The hypervisor may replace it with something else but only the
index 7bcb861a04e5cf5b32c2c26919756fa29353cb07..5a2ed3ed2f261893d5de08022ea3259198c8c0fe 100644 (file)
@@ -165,6 +165,7 @@ struct x86_pmu_capability {
 #define GLOBAL_STATUS_ASIF                             BIT_ULL(60)
 #define GLOBAL_STATUS_COUNTERS_FROZEN                  BIT_ULL(59)
 #define GLOBAL_STATUS_LBRS_FROZEN                      BIT_ULL(58)
+#define GLOBAL_STATUS_TRACE_TOPAPMI                    BIT_ULL(55)
 
 /*
  * IBS cpuid feature detection
index 20c11d1aa4ccce11b0709c3fe56092a5505151cf..813384ef811a03273678f693ca60be23d8bfd216 100644 (file)
@@ -129,6 +129,8 @@ struct cpuinfo_x86 {
        u16                     booted_cores;
        /* Physical processor id: */
        u16                     phys_proc_id;
+       /* Logical processor id: */
+       u16                     logical_proc_id;
        /* Core id: */
        u16                     cpu_core_id;
        /* Compute unit id */
index 0a5242428659045cfb439d3045593cc1c63aad96..13b6cdd0af57049468e47ef884e6eccba49dca0e 100644 (file)
@@ -7,7 +7,7 @@
 extern char __brk_base[], __brk_limit[];
 extern struct exception_table_entry __stop___ex_table[];
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 extern char __end_rodata_hpage_align[];
 #endif
 
index 0fb46482dfde160b9dcfad6ef57841e07c3830e2..7f991bd5031b24947e0773265023ab70b934a7b8 100644 (file)
@@ -119,12 +119,23 @@ static inline void setup_node_to_cpumask_map(void) { }
 
 extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
+#define topology_logical_package_id(cpu)       (cpu_data(cpu).logical_proc_id)
 #define topology_physical_package_id(cpu)      (cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)                  (cpu_data(cpu).cpu_core_id)
 
 #ifdef ENABLE_TOPO_DEFINES
 #define topology_core_cpumask(cpu)             (per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)          (per_cpu(cpu_sibling_map, cpu))
+
+extern unsigned int __max_logical_packages;
+#define topology_max_packages()                        (__max_logical_packages)
+int topology_update_package_map(unsigned int apicid, unsigned int cpu);
+extern int topology_phys_to_logical_pkg(unsigned int pkg);
+#else
+#define topology_max_packages()                        (1)
+static inline int
+topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
+static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
index 8a5cddac7d444084c223e95948944b5099d15003..531b9611c51d5d4b42f721f45609a5f4ecd3cce2 100644 (file)
@@ -2077,6 +2077,20 @@ int generic_processor_info(int apicid, int version)
        } else
                cpu = cpumask_next_zero(-1, cpu_present_mask);
 
+       /*
+        * This can happen on physical hotplug. The sanity check at boot time
+        * is done from native_smp_prepare_cpus() after num_possible_cpus() is
+        * established.
+        */
+       if (topology_update_package_map(apicid, cpu) < 0) {
+               int thiscpu = max + disabled_cpus;
+
+               pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+                          thiscpu, apicid);
+               disabled_cpus++;
+               return -ENOSPC;
+       }
+
        /*
         * Validate version
         */
index 58031303e30488c540d609f95d0693918c09fa64..7a60424d63fa4853c37941725bfaa73372c81175 100644 (file)
@@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR)               += centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)     += transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)           += umc.o
 
-obj-$(CONFIG_PERF_EVENTS)              += perf_event.o
-
-ifdef CONFIG_PERF_EVENTS
-obj-$(CONFIG_CPU_SUP_AMD)              += perf_event_amd.o perf_event_amd_uncore.o
-ifdef CONFIG_AMD_IOMMU
-obj-$(CONFIG_CPU_SUP_AMD)              += perf_event_amd_iommu.o
-endif
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_p6.o perf_event_knc.o perf_event_p4.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_rapl.o perf_event_intel_cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_pt.o perf_event_intel_bts.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_intel_cstate.o
-
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
-                                          perf_event_intel_uncore_snb.o \
-                                          perf_event_intel_uncore_snbep.o \
-                                          perf_event_intel_uncore_nhmex.o
-obj-$(CONFIG_CPU_SUP_INTEL)            += perf_event_msr.o
-obj-$(CONFIG_CPU_SUP_AMD)              += perf_event_msr.o
-endif
-
-
 obj-$(CONFIG_X86_MCE)                  += mcheck/
 obj-$(CONFIG_MTRR)                     += mtrr/
 obj-$(CONFIG_MICROCODE)                        += microcode/
 
-obj-$(CONFIG_X86_LOCAL_APIC)           += perfctr-watchdog.o perf_event_amd_ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC)           += perfctr-watchdog.o
 
 obj-$(CONFIG_HYPERVISOR_GUEST)         += vmware.o hypervisor.o mshyperv.o
 
index a07956a08936e8ea56c1a73075d700d281895577..97c59fd60702f4ceacad7c4d46a3c8d5de7dacbf 100644 (file)
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                void (*f_vide)(void);
                u64 d, d2;
 
-               printk(KERN_INFO "AMD K6 stepping B detected - ");
+               pr_info("AMD K6 stepping B detected - ");
 
                /*
                 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                d = d2-d;
 
                if (d > 20*K6_BUG_LOOP)
-                       printk(KERN_CONT
-                               "system stability may be impaired when more than 32 MB are used.\n");
+                       pr_cont("system stability may be impaired when more than 32 MB are used.\n");
                else
-                       printk(KERN_CONT "probably OK (after B9730xxxx).\n");
+                       pr_cont("probably OK (after B9730xxxx).\n");
        }
 
        /* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                        wbinvd();
                        wrmsr(MSR_K6_WHCR, l, h);
                        local_irq_restore(flags);
-                       printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
+                       pr_info("Enabling old style K6 write allocation for %d Mb\n",
                                mbytes);
                }
                return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                        wbinvd();
                        wrmsr(MSR_K6_WHCR, l, h);
                        local_irq_restore(flags);
-                       printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
+                       pr_info("Enabling new style K6 write allocation for %d Mb\n",
                                mbytes);
                }
 
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         */
        if (c->x86_model >= 6 && c->x86_model <= 10) {
                if (!cpu_has(c, X86_FEATURE_XMM)) {
-                       printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+                       pr_info("Enabling disabled K7/SSE Support.\n");
                        msr_clear_bit(MSR_K7_HWCR, 15);
                        set_cpu_cap(c, X86_FEATURE_XMM);
                }
@@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
        if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
                rdmsr(MSR_K7_CLK_CTL, l, h);
                if ((l & 0xfff00000) != 0x20000000) {
-                       printk(KERN_INFO
-                           "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
-                                       l, ((l & 0x000fffff)|0x20000000));
+                       pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+                               l, ((l & 0x000fffff)|0x20000000));
                        wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
                }
        }
@@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
                if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
                        unsigned long pfn = tseg >> PAGE_SHIFT;
 
-                       printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+                       pr_debug("tseg: %010llx\n", tseg);
                        if (pfn_range_is_mapped(pfn, pfn + 1))
                                set_memory_4k((unsigned long)__va(tseg), 1);
                }
@@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
                        rdmsrl(MSR_K7_HWCR, val);
                        if (!(val & BIT(24)))
-                               printk(KERN_WARNING FW_BUG "TSC doesn't count "
-                                       "with P0 frequency!\n");
+                               pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n");
                }
        }
 
index 04f0fe5af83ec34bb4fd6ec09fd3cc8db1c7ee07..a972ac4c7e7df05238e78b08e093cded47640fcd 100644 (file)
@@ -15,7 +15,7 @@ void __init check_bugs(void)
 {
        identify_boot_cpu();
 #if !defined(CONFIG_SMP)
-       printk(KERN_INFO "CPU: ");
+       pr_info("CPU: ");
        print_cpu_info(&boot_cpu_data);
 #endif
        alternative_instructions();
index ae20be6e483c77703413cf36e9db065134da01f3..ce197bb7c1294f0f1138c78d1514c78e0d92b910 100644 (file)
@@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c)
                        rdmsr(MSR_VIA_FCR, lo, hi);
                        lo |= ACE_FCR;          /* enable ACE unit */
                        wrmsr(MSR_VIA_FCR, lo, hi);
-                       printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
+                       pr_info("CPU: Enabled ACE h/w crypto\n");
                }
 
                /* enable RNG unit, if present and disabled */
@@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c)
                        rdmsr(MSR_VIA_RNG, lo, hi);
                        lo |= RNG_ENABLE;       /* enable RNG unit */
                        wrmsr(MSR_VIA_RNG, lo, hi);
-                       printk(KERN_INFO "CPU: Enabled h/w RNG\n");
+                       pr_info("CPU: Enabled h/w RNG\n");
                }
 
                /* store Centaur Extended Feature Flags as
@@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
                        name = "C6";
                        fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
                        fcr_clr = DPDC;
-                       printk(KERN_NOTICE "Disabling bugged TSC.\n");
+                       pr_notice("Disabling bugged TSC.\n");
                        clear_cpu_cap(c, X86_FEATURE_TSC);
                        break;
                case 8:
@@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c)
                newlo = (lo|fcr_set) & (~fcr_clr);
 
                if (newlo != lo) {
-                       printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+                       pr_info("Centaur FCR was 0x%X now 0x%X\n",
                                lo, newlo);
                        wrmsr(MSR_IDT_FCR1, newlo, hi);
                } else {
-                       printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+                       pr_info("Centaur FCR is 0x%X\n", lo);
                }
                /* Emulate MTRRs using Centaur's MCR. */
                set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
index 37830de8f60a8f0d8f8da27d409da72ce1f13551..81cf716f6f97f2438b386c7e3eb3ca073e16d0c8 100644 (file)
@@ -228,7 +228,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
        lo |= 0x200000;
        wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 
-       printk(KERN_NOTICE "CPU serial number disabled.\n");
+       pr_notice("CPU serial number disabled.\n");
        clear_cpu_cap(c, X86_FEATURE_PN);
 
        /* Disabling the serial number may affect the cpuid level */
@@ -329,9 +329,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
                if (!warn)
                        continue;
 
-               printk(KERN_WARNING
-                      "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
-                               x86_cap_flag(df->feature), df->level);
+               pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+                       x86_cap_flag(df->feature), df->level);
        }
 }
 
@@ -510,7 +509,7 @@ void detect_ht(struct cpuinfo_x86 *c)
        smp_num_siblings = (ebx & 0xff0000) >> 16;
 
        if (smp_num_siblings == 1) {
-               printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
+               pr_info_once("CPU0: Hyper-Threading is disabled\n");
                goto out;
        }
 
@@ -531,10 +530,10 @@ void detect_ht(struct cpuinfo_x86 *c)
 
 out:
        if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
-               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
-                      c->phys_proc_id);
-               printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
-                      c->cpu_core_id);
+               pr_info("CPU: Physical Processor ID: %d\n",
+                       c->phys_proc_id);
+               pr_info("CPU: Processor Core ID: %d\n",
+                       c->cpu_core_id);
                printed = 1;
        }
 #endif
@@ -559,9 +558,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c)
                }
        }
 
-       printk_once(KERN_ERR
-                       "CPU: vendor_id '%s' unknown, using generic init.\n" \
-                       "CPU: Your system may be unstable.\n", v);
+       pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \
+                   "CPU: Your system may be unstable.\n", v);
 
        c->x86_vendor = X86_VENDOR_UNKNOWN;
        this_cpu = &default_cpu;
@@ -760,7 +758,7 @@ void __init early_cpu_init(void)
        int count = 0;
 
 #ifdef CONFIG_PROCESSOR_SELECT
-       printk(KERN_INFO "KERNEL supported cpus:\n");
+       pr_info("KERNEL supported cpus:\n");
 #endif
 
        for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
@@ -778,7 +776,7 @@ void __init early_cpu_init(void)
                        for (j = 0; j < 2; j++) {
                                if (!cpudev->c_ident[j])
                                        continue;
-                               printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
+                               pr_info("  %s %s\n", cpudev->c_vendor,
                                        cpudev->c_ident[j]);
                        }
                }
@@ -977,6 +975,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_NUMA
        numa_add_cpu(smp_processor_id());
 #endif
+       /* The boot/hotplug time assigment got cleared, restore it */
+       c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
 }
 
 /*
@@ -1061,7 +1061,7 @@ static void __print_cpu_msr(void)
                for (index = index_min; index < index_max; index++) {
                        if (rdmsrl_safe(index, &val))
                                continue;
-                       printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
+                       pr_info(" MSR%08x: %016llx\n", index, val);
                }
        }
 }
@@ -1100,19 +1100,19 @@ void print_cpu_info(struct cpuinfo_x86 *c)
        }
 
        if (vendor && !strstr(c->x86_model_id, vendor))
-               printk(KERN_CONT "%s ", vendor);
+               pr_cont("%s ", vendor);
 
        if (c->x86_model_id[0])
-               printk(KERN_CONT "%s", c->x86_model_id);
+               pr_cont("%s", c->x86_model_id);
        else
-               printk(KERN_CONT "%d86", c->x86);
+               pr_cont("%d86", c->x86);
 
-       printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
+       pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
        if (c->x86_mask || c->cpuid_level >= 0)
-               printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask);
+               pr_cont(", stepping: 0x%x)\n", c->x86_mask);
        else
-               printk(KERN_CONT ")\n");
+               pr_cont(")\n");
 
        print_cpu_msr(c);
 }
@@ -1438,7 +1438,7 @@ void cpu_init(void)
 
        show_ucode_info_early();
 
-       printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+       pr_info("Initializing CPU#%d\n", cpu);
 
        if (cpu_feature_enabled(X86_FEATURE_VME) ||
            cpu_has_tsc ||
index aaf152e79637384781d264afbff76a58da9489cc..187bb583d0dfbccd6bf3e8c58a4973a0bff9ddd7 100644 (file)
@@ -103,7 +103,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c)
                local_irq_restore(flags);
 
                if (ccr5 & 2) { /* possible wrong calibration done */
-                       printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n");
+                       pr_info("Recalibrating delay loop with SLOP bit reset\n");
                        calibrate_delay();
                        c->loops_per_jiffy = loops_per_jiffy;
                }
@@ -115,7 +115,7 @@ static void set_cx86_reorder(void)
 {
        u8 ccr3;
 
-       printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+       pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n");
        ccr3 = getCx86(CX86_CCR3);
        setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
 
@@ -128,7 +128,7 @@ static void set_cx86_reorder(void)
 
 static void set_cx86_memwb(void)
 {
-       printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+       pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
        /* CCR2 bit 2: unlock NW bit */
        setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
@@ -268,7 +268,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
                 *  VSA1 we work around however.
                 */
 
-               printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
+               pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n");
                isa_dma_bridge_buggy = 2;
 
                /* We do this before the PCI layer is running. However we
@@ -426,7 +426,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c)
                if (dir0 == 5 || dir0 == 3) {
                        unsigned char ccr3;
                        unsigned long flags;
-                       printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
+                       pr_info("Enabling CPUID on Cyrix processor.\n");
                        local_irq_save(flags);
                        ccr3 = getCx86(CX86_CCR3);
                        /* enable MAPEN  */
index d820d8eae96be0b3daa0ec01d9f4a22bf1ad930e..73d391ae452f82a7bfeca4be2276eed628dcd452 100644 (file)
@@ -56,7 +56,7 @@ detect_hypervisor_vendor(void)
        }
 
        if (max_pri)
-               printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
+               pr_info("Hypervisor detected: %s\n", x86_hyper->name);
 }
 
 void init_hypervisor(struct cpuinfo_x86 *c)
index 565648bc1a0aef6c3cf60da92ec9fb60a2408c90..38766c2b5b003984bcc63e459e7cfb820c907473 100644 (file)
@@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
         */
        if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
            c->microcode < 0x20e) {
-               printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
+               pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
                clear_cpu_cap(c, X86_FEATURE_PSE);
        }
 
@@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
        if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
                rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
                if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) {
-                       printk(KERN_INFO "Disabled fast string operations\n");
+                       pr_info("Disabled fast string operations\n");
                        setup_clear_cpu_cap(X86_FEATURE_REP_GOOD);
                        setup_clear_cpu_cap(X86_FEATURE_ERMS);
                }
@@ -160,6 +160,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
                pr_info("Disabling PGE capability bit\n");
                setup_clear_cpu_cap(X86_FEATURE_PGE);
        }
+
+       if (c->cpuid_level >= 0x00000001) {
+               u32 eax, ebx, ecx, edx;
+
+               cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+               /*
+                * If HTT (EDX[28]) is set EBX[16:23] contain the number of
+                * apicids which are reserved per package. Store the resulting
+                * shift value for the package management code.
+                */
+               if (edx & (1U << 28))
+                       c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff);
+       }
 }
 
 #ifdef CONFIG_X86_32
@@ -176,7 +189,7 @@ int ppro_with_ram_bug(void)
            boot_cpu_data.x86 == 6 &&
            boot_cpu_data.x86_model == 1 &&
            boot_cpu_data.x86_mask < 8) {
-               printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n");
+               pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
                return 1;
        }
        return 0;
@@ -225,7 +238,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 
                set_cpu_bug(c, X86_BUG_F00F);
                if (!f00f_workaround_enabled) {
-                       printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
+                       pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n");
                        f00f_workaround_enabled = 1;
                }
        }
@@ -244,7 +257,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * Forcefully enable PAE if kernel parameter "forcepae" is present.
         */
        if (forcepae) {
-               printk(KERN_WARNING "PAE forced!\n");
+               pr_warn("PAE forced!\n");
                set_cpu_cap(c, X86_FEATURE_PAE);
                add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
        }
index 0b6c52388cf484f809a7f328f475424dd262417c..6ed779efff2662bbeaa5f063a7d1c8ff2fd16e2f 100644 (file)
@@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
        err = amd_set_l3_disable_slot(nb, cpu, slot, val);
        if (err) {
                if (err == -EEXIST)
-                       pr_warning("L3 slot %d in use/index already disabled!\n",
+                       pr_warn("L3 slot %d in use/index already disabled!\n",
                                   slot);
                return err;
        }
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h
deleted file mode 100644 (file)
index 336878a..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Intel(R) Processor Trace PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * Intel PT is specified in the Intel Architecture Instruction Set Extensions
- * Programming Reference:
- * http://software.intel.com/en-us/intel-isa-extensions
- */
-
-#ifndef __INTEL_PT_H__
-#define __INTEL_PT_H__
-
-/*
- * Single-entry ToPA: when this close to region boundary, switch
- * buffers to avoid losing data.
- */
-#define TOPA_PMI_MARGIN 512
-
-#define TOPA_SHIFT 12
-
-static inline unsigned int sizes(unsigned int tsz)
-{
-       return 1 << (tsz + TOPA_SHIFT);
-};
-
-struct topa_entry {
-       u64     end     : 1;
-       u64     rsvd0   : 1;
-       u64     intr    : 1;
-       u64     rsvd1   : 1;
-       u64     stop    : 1;
-       u64     rsvd2   : 1;
-       u64     size    : 4;
-       u64     rsvd3   : 2;
-       u64     base    : 36;
-       u64     rsvd4   : 16;
-};
-
-#define PT_CPUID_LEAVES                2
-#define PT_CPUID_REGS_NUM      4 /* number of regsters (eax, ebx, ecx, edx) */
-
-enum pt_capabilities {
-       PT_CAP_max_subleaf = 0,
-       PT_CAP_cr3_filtering,
-       PT_CAP_psb_cyc,
-       PT_CAP_mtc,
-       PT_CAP_topa_output,
-       PT_CAP_topa_multiple_entries,
-       PT_CAP_single_range_output,
-       PT_CAP_payloads_lip,
-       PT_CAP_mtc_periods,
-       PT_CAP_cycle_thresholds,
-       PT_CAP_psb_periods,
-};
-
-struct pt_pmu {
-       struct pmu              pmu;
-       u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
-};
-
-/**
- * struct pt_buffer - buffer configuration; one buffer per task_struct or
- *             cpu, depending on perf event configuration
- * @cpu:       cpu for per-cpu allocation
- * @tables:    list of ToPA tables in this buffer
- * @first:     shorthand for first topa table
- * @last:      shorthand for last topa table
- * @cur:       current topa table
- * @nr_pages:  buffer size in pages
- * @cur_idx:   current output region's index within @cur table
- * @output_off:        offset within the current output region
- * @data_size: running total of the amount of data in this buffer
- * @lost:      if data was lost/truncated
- * @head:      logical write offset inside the buffer
- * @snapshot:  if this is for a snapshot/overwrite counter
- * @stop_pos:  STOP topa entry in the buffer
- * @intr_pos:  INT topa entry in the buffer
- * @data_pages:        array of pages from perf
- * @topa_index:        table of topa entries indexed by page offset
- */
-struct pt_buffer {
-       int                     cpu;
-       struct list_head        tables;
-       struct topa             *first, *last, *cur;
-       unsigned int            cur_idx;
-       size_t                  output_off;
-       unsigned long           nr_pages;
-       local_t                 data_size;
-       local_t                 lost;
-       local64_t               head;
-       bool                    snapshot;
-       unsigned long           stop_pos, intr_pos;
-       void                    **data_pages;
-       struct topa_entry       *topa_index[0];
-};
-
-/**
- * struct pt - per-cpu pt context
- * @handle:    perf output handle
- * @handle_nmi:        do handle PT PMI on this cpu, there's an active event
- */
-struct pt {
-       struct perf_output_handle handle;
-       int                     handle_nmi;
-};
-
-#endif /* __INTEL_PT_H__ */
index 4cfba4371a71f28c4615610475e3eec9e8d9790c..517619ea6498b41839f87f28bce4a76b1e43c7ab 100644 (file)
@@ -115,7 +115,7 @@ static int raise_local(void)
        int cpu = m->extcpu;
 
        if (m->inject_flags & MCJ_EXCEPTION) {
-               printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
+               pr_info("Triggering MCE exception on CPU %d\n", cpu);
                switch (context) {
                case MCJ_CTX_IRQ:
                        /*
@@ -128,15 +128,15 @@ static int raise_local(void)
                        raise_exception(m, NULL);
                        break;
                default:
-                       printk(KERN_INFO "Invalid MCE context\n");
+                       pr_info("Invalid MCE context\n");
                        ret = -EINVAL;
                }
-               printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+               pr_info("MCE exception done on CPU %d\n", cpu);
        } else if (m->status) {
-               printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
+               pr_info("Starting machine check poll CPU %d\n", cpu);
                raise_poll(m);
                mce_notify_irq();
-               printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+               pr_info("Machine check poll done on CPU %d\n", cpu);
        } else
                m->finished = 0;
 
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
                start = jiffies;
                while (!cpumask_empty(mce_inject_cpumask)) {
                        if (!time_before(jiffies, start + 2*HZ)) {
-                               printk(KERN_ERR
-                               "Timeout waiting for mce inject %lx\n",
+                               pr_err("Timeout waiting for mce inject %lx\n",
                                        *cpumask_bits(mce_inject_cpumask));
                                break;
                        }
@@ -241,7 +240,7 @@ static int inject_init(void)
 {
        if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
                return -ENOMEM;
-       printk(KERN_INFO "Machine check injector initialized\n");
+       pr_info("Machine check injector initialized\n");
        register_mce_write_callback(mce_write);
        register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
                                "mce_notify");
index 12402e10aeffda428821dd6dcb88597203e48054..2a0717bf803372d3968dca9d2b2e6c81740d946d 100644 (file)
@@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
        rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
        rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
 
-       printk(KERN_EMERG
-               "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
-               smp_processor_id(), loaddr, lotype);
+       pr_emerg("CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n",
+                smp_processor_id(), loaddr, lotype);
 
        if (lotype & (1<<5)) {
-               printk(KERN_EMERG
-                       "CPU#%d: Possible thermal failure (CPU on fire ?).\n",
-                       smp_processor_id());
+               pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n",
+                        smp_processor_id());
        }
 
        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
        /* Read registers before enabling: */
        rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
        rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
-       printk(KERN_INFO
-              "Intel old style machine check architecture supported.\n");
+       pr_info("Intel old style machine check architecture supported.\n");
 
        /* Enable MCE: */
        cr4_set_bits(X86_CR4_MCE);
-       printk(KERN_INFO
-              "Intel old style machine check reporting enabled on CPU#%d.\n",
-              smp_processor_id());
+       pr_info("Intel old style machine check reporting enabled on CPU#%d.\n",
+               smp_processor_id());
 }
index 2c5aaf8c2e2f3dcc94d348dfe91da6d7d5000ae9..0b445c2ff735d44fedc469fd8ce0c1b8b1f1633b 100644 (file)
@@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level)
        /* if we just entered the thermal event */
        if (new_event) {
                if (event == THERMAL_THROTTLING_EVENT)
-                       printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
+                       pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
                                this_cpu,
                                level == CORE_LEVEL ? "Core" : "Package",
                                state->count);
@@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level)
        }
        if (old_event) {
                if (event == THERMAL_THROTTLING_EVENT)
-                       printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
-                               this_cpu,
+                       pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
                                level == CORE_LEVEL ? "Core" : "Package");
                return 1;
        }
@@ -417,8 +416,8 @@ static void intel_thermal_interrupt(void)
 
 static void unexpected_thermal_interrupt(void)
 {
-       printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
-                       smp_processor_id());
+       pr_err("CPU%d: Unexpected LVT thermal interrupt!\n",
+               smp_processor_id());
 }
 
 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -499,7 +498,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 
        if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
                if (system_state == SYSTEM_BOOTING)
-                       printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+                       pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
                return;
        }
 
@@ -557,8 +556,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
        l = apic_read(APIC_LVTTHMR);
        apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 
-       printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
-                      tm2 ? "TM2" : "TM1");
+       pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
+                     tm2 ? "TM2" : "TM1");
 
        /* enable thermal throttle processing */
        atomic_set(&therm_throt_en, 1);
index 7245980186eea047e643af5010e1509bfc991632..fcf9ae9384f4cb4693d67cc8ee6433562dfc9f34 100644 (file)
@@ -12,8 +12,8 @@
 
 static void default_threshold_interrupt(void)
 {
-       printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
-                        THRESHOLD_APIC_VECTOR);
+       pr_err("Unexpected threshold interrupt at vector %x\n",
+               THRESHOLD_APIC_VECTOR);
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
index 01dd8702880b7f2db9fcdc3874ba8809c2ce9b80..c6a722e1d011458fa30ec5ad3ad4e78c58d2fa82 100644 (file)
@@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
        ist_enter(regs);
 
-       printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+       pr_emerg("CPU0: Machine Check Exception.\n");
        add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
        ist_exit(regs);
@@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
 
        cr4_set_bits(X86_CR4_MCE);
 
-       printk(KERN_INFO
-              "Winchip machine check reporting enabled on CPU#0.\n");
+       pr_info("Winchip machine check reporting enabled on CPU#0.\n");
 }
index 2233f8a766156891a52b9a7658f04efeaf4f86c8..75d3aab5f7b243623ca311c4c23bf07ef2e26ca4 100644 (file)
@@ -953,7 +953,7 @@ struct microcode_ops * __init init_amd_microcode(void)
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
        if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
-               pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
+               pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
                return NULL;
        }
 
index 20e242ea1bc46b5f5828c7b95071d920853b7609..4e7c6933691cc8de42aa4a82473482127bd0faab 100644 (file)
@@ -161,8 +161,8 @@ static void __init ms_hyperv_init_platform(void)
        ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
        ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
-       printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
-              ms_hyperv.features, ms_hyperv.hints);
+       pr_info("HyperV: features 0x%x, hints 0x%x\n",
+               ms_hyperv.features, ms_hyperv.hints);
 
 #ifdef CONFIG_X86_LOCAL_APIC
        if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
@@ -174,8 +174,8 @@ static void __init ms_hyperv_init_platform(void)
                rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
                hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
                lapic_timer_frequency = hv_lapic_frequency;
-               printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n",
-                               lapic_timer_frequency);
+               pr_info("HyperV: LAPIC Timer Frequency: %#x\n",
+                       lapic_timer_frequency);
        }
 #endif
 
index 316fe3e60a9764e479d4e49d01346c7921763ee8..3d689937fc1b13974a0a8a5edd31ddda0f6401b2 100644 (file)
@@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
         */
        if (type != MTRR_TYPE_WRCOMB &&
            (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
-               pr_warning("mtrr: only write-combining%s supported\n",
+               pr_warn("mtrr: only write-combining%s supported\n",
                           centaur_mcr_type ? " and uncacheable are" : " is");
                return -EINVAL;
        }
index 0d98503c2245aab81283526c062f746650b99c32..31e951ce6dff33782c9610c7b43898181d59d637 100644 (file)
@@ -57,9 +57,9 @@ static int __initdata                         nr_range;
 static struct var_mtrr_range_state __initdata  range_state[RANGE_NUM];
 
 static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
 
-#define BIOS_BUG_MSG KERN_WARNING \
+#define BIOS_BUG_MSG \
        "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
 
 static int __init
@@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
                                                base, base + size);
        }
        if (debug_print) {
-               printk(KERN_DEBUG "After WB checking\n");
+               pr_debug("After WB checking\n");
                for (i = 0; i < nr_range; i++)
-                       printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+                       pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
                                 range[i].start, range[i].end);
        }
 
@@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
                    (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
                    (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
                        /* Var MTRR contains UC entry below 1M? Skip it: */
-                       printk(BIOS_BUG_MSG, i);
+                       pr_warn(BIOS_BUG_MSG, i);
                        if (base + size <= (1<<(20-PAGE_SHIFT)))
                                continue;
                        size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
                                 extra_remove_base + extra_remove_size);
 
        if  (debug_print) {
-               printk(KERN_DEBUG "After UC checking\n");
+               pr_debug("After UC checking\n");
                for (i = 0; i < RANGE_NUM; i++) {
                        if (!range[i].end)
                                continue;
-                       printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+                       pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
                                 range[i].start, range[i].end);
                }
        }
@@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
        /* sort the ranges */
        nr_range = clean_sort_range(range, RANGE_NUM);
        if  (debug_print) {
-               printk(KERN_DEBUG "After sorting\n");
+               pr_debug("After sorting\n");
                for (i = 0; i < nr_range; i++)
-                       printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
+                       pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
                                 range[i].start, range[i].end);
        }
 
@@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void)
                start_base = to_size_factor(start_base, &start_factor),
                type = range_state[i].type;
 
-               printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+               pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
                        i, start_base, start_factor,
                        size_base, size_factor,
                        (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
@@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                return 0;
 
        /* Print original var MTRRs at first, for debugging: */
-       printk(KERN_DEBUG "original variable MTRRs\n");
+       pr_debug("original variable MTRRs\n");
        print_out_mtrr_range_state();
 
        memset(range, 0, sizeof(range));
@@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                                          x_remove_base, x_remove_size);
 
        range_sums = sum_ranges(range, nr_range);
-       printk(KERN_INFO "total RAM covered: %ldM\n",
+       pr_info("total RAM covered: %ldM\n",
               range_sums >> (20 - PAGE_SHIFT));
 
        if (mtrr_chunk_size && mtrr_gran_size) {
@@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits)
 
                if (!result[i].bad) {
                        set_var_mtrr_all(address_bits);
-                       printk(KERN_DEBUG "New variable MTRRs\n");
+                       pr_debug("New variable MTRRs\n");
                        print_out_mtrr_range_state();
                        return 1;
                }
-               printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
-                      "will find optimal one\n");
+               pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n");
        }
 
        i = 0;
@@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                                      x_remove_base, x_remove_size, i);
                        if (debug_print) {
                                mtrr_print_out_one_result(i);
-                               printk(KERN_INFO "\n");
+                               pr_info("\n");
                        }
 
                        i++;
@@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits)
        index_good = mtrr_search_optimal_index();
 
        if (index_good != -1) {
-               printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+               pr_info("Found optimal setting for mtrr clean up\n");
                i = index_good;
                mtrr_print_out_one_result(i);
 
@@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits)
                gran_size <<= 10;
                x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
                set_var_mtrr_all(address_bits);
-               printk(KERN_DEBUG "New variable MTRRs\n");
+               pr_debug("New variable MTRRs\n");
                print_out_mtrr_range_state();
                return 1;
        } else {
@@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits)
                        mtrr_print_out_one_result(i);
        }
 
-       printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
-       printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+       pr_info("mtrr_cleanup: can not find optimal value\n");
+       pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n");
 
        return 0;
 }
@@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 
        /* kvm/qemu doesn't have mtrr set right, don't trim them all: */
        if (!highest_pfn) {
-               printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+               pr_info("CPU MTRRs all blank - virtualized system.\n");
                return 0;
        }
 
@@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
                                                         end_pfn);
 
        if (total_trim_size) {
-               pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
+               pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n",
+                       total_trim_size >> 20);
 
                if (!changed_by_mtrr_cleanup)
                        WARN_ON(1);
index c870af1610083ec3dda7cb61b966860c9a224374..fcbcb2f678ca47360d23623887d216165969d6e8 100644 (file)
@@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
 
        rdmsr(MSR_K8_SYSCFG, lo, hi);
        if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
-               printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
+               pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
                       " not cleared by BIOS, clearing this bit\n",
                       smp_processor_id());
                lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
@@ -501,14 +501,14 @@ void __init mtrr_state_warn(void)
        if (!mask)
                return;
        if (mask & MTRR_CHANGE_MASK_FIXED)
-               pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+               pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
        if (mask & MTRR_CHANGE_MASK_VARIABLE)
-               pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+               pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n");
        if (mask & MTRR_CHANGE_MASK_DEFTYPE)
-               pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+               pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
 
-       printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
-       printk(KERN_INFO "mtrr: corrected configuration.\n");
+       pr_info("mtrr: probably your BIOS does not setup all CPUs.\n");
+       pr_info("mtrr: corrected configuration.\n");
 }
 
 /*
@@ -519,8 +519,7 @@ void __init mtrr_state_warn(void)
 void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
 {
        if (wrmsr_safe(msr, a, b) < 0) {
-               printk(KERN_ERR
-                       "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
+               pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
                        smp_processor_id(), msr, a, b);
        }
 }
@@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
                tmp |= ~((1ULL<<(hi - 1)) - 1);
 
                if (tmp != mask) {
-                       printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
+                       pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
                        add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
                        mask = tmp;
                }
@@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
            boot_cpu_data.x86_model == 1 &&
            boot_cpu_data.x86_mask <= 7) {
                if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
-                       pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+                       pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
                        return -EINVAL;
                }
                if (!(base + size < 0x70000 || base > 0x7003F) &&
                    (type == MTRR_TYPE_WRCOMB
                     || type == MTRR_TYPE_WRBACK)) {
-                       pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+                       pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
                        return -EINVAL;
                }
        }
@@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
             lbase = lbase >> 1, last = last >> 1)
                ;
        if (lbase != last) {
-               pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
+               pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
                return -EINVAL;
        }
        return 0;
index 5c3d149ee91cb1f6c87ff6ad1853a38adfce82da..ba80d68f683e72727d455cb36a3d0dc8493a5e25 100644 (file)
@@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
                return error;
 
        if (type >= MTRR_NUM_TYPES) {
-               pr_warning("mtrr: type: %u invalid\n", type);
+               pr_warn("mtrr: type: %u invalid\n", type);
                return -EINVAL;
        }
 
        /* If the type is WC, check that this processor supports it */
        if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
-               pr_warning("mtrr: your processor doesn't support write-combining\n");
+               pr_warn("mtrr: your processor doesn't support write-combining\n");
                return -ENOSYS;
        }
 
        if (!size) {
-               pr_warning("mtrr: zero sized request\n");
+               pr_warn("mtrr: zero sized request\n");
                return -EINVAL;
        }
 
        if ((base | (base + size - 1)) >>
            (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
-               pr_warning("mtrr: base or size exceeds the MTRR width\n");
+               pr_warn("mtrr: base or size exceeds the MTRR width\n");
                return -EINVAL;
        }
 
@@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
                                } else if (types_compatible(type, ltype))
                                        continue;
                        }
-                       pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+                       pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
                                " 0x%lx000,0x%lx000\n", base, size, lbase,
                                lsize);
                        goto out;
@@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
                if (ltype != type) {
                        if (types_compatible(type, ltype))
                                continue;
-                       pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+                       pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
                                base, size, mtrr_attrib_to_str(ltype),
                                mtrr_attrib_to_str(type));
                        goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
 static int mtrr_check(unsigned long base, unsigned long size)
 {
        if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-               pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+               pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
                pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
                dump_stack();
                return -1;
@@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
                }
        }
        if (reg >= max) {
-               pr_warning("mtrr: register: %d too big\n", reg);
+               pr_warn("mtrr: register: %d too big\n", reg);
                goto out;
        }
        mtrr_if->get(reg, &lbase, &lsize, &ltype);
        if (lsize < 1) {
-               pr_warning("mtrr: MTRR %d not used\n", reg);
+               pr_warn("mtrr: MTRR %d not used\n", reg);
                goto out;
        }
        if (mtrr_usage_table[reg] < 1) {
-               pr_warning("mtrr: reg: %d has count=0\n", reg);
+               pr_warn("mtrr: reg: %d has count=0\n", reg);
                goto out;
        }
        if (--mtrr_usage_table[reg] < 1)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
deleted file mode 100644 (file)
index 1b443db..0000000
+++ /dev/null
@@ -1,2428 +0,0 @@
-/*
- * Performance events x86 architecture code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- *  Copyright (C) 2009 Google, Inc., Stephane Eranian
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/capability.h>
-#include <linux/notifier.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/bitops.h>
-#include <linux/device.h>
-
-#include <asm/apic.h>
-#include <asm/stacktrace.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/alternative.h>
-#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
-#include <asm/timer.h>
-#include <asm/desc.h>
-#include <asm/ldt.h>
-
-#include "perf_event.h"
-
-struct x86_pmu x86_pmu __read_mostly;
-
-DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
-       .enabled = 1,
-};
-
-struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
-
-u64 __read_mostly hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-u64 __read_mostly hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-/*
- * Propagate event elapsed time into the generic event.
- * Can only be executed on the CPU where the event is active.
- * Returns the delta events processed.
- */
-u64 x86_perf_event_update(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       int shift = 64 - x86_pmu.cntval_bits;
-       u64 prev_raw_count, new_raw_count;
-       int idx = hwc->idx;
-       s64 delta;
-
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
-               return 0;
-
-       /*
-        * Careful: an NMI might modify the previous event value.
-        *
-        * Our tactic to handle this is to first atomically read and
-        * exchange a new raw count - then add that new-prev delta
-        * count to the generic event atomically:
-        */
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       rdpmcl(hwc->event_base_rdpmc, new_raw_count);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       new_raw_count) != prev_raw_count)
-               goto again;
-
-       /*
-        * Now we have the new raw value and have updated the prev
-        * timestamp already. We can now calculate the elapsed delta
-        * (event-)time and add that to the generic event.
-        *
-        * Careful, not all hw sign-extends above the physical width
-        * of the count.
-        */
-       delta = (new_raw_count << shift) - (prev_raw_count << shift);
-       delta >>= shift;
-
-       local64_add(delta, &event->count);
-       local64_sub(delta, &hwc->period_left);
-
-       return new_raw_count;
-}
-
-/*
- * Find and validate any extra registers to set up.
- */
-static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg;
-       struct extra_reg *er;
-
-       reg = &event->hw.extra_reg;
-
-       if (!x86_pmu.extra_regs)
-               return 0;
-
-       for (er = x86_pmu.extra_regs; er->msr; er++) {
-               if (er->event != (config & er->config_mask))
-                       continue;
-               if (event->attr.config1 & ~er->valid_mask)
-                       return -EINVAL;
-               /* Check if the extra msrs can be safely accessed*/
-               if (!er->extra_msr_access)
-                       return -ENXIO;
-
-               reg->idx = er->idx;
-               reg->config = event->attr.config1;
-               reg->reg = er->msr;
-               break;
-       }
-       return 0;
-}
-
-static atomic_t active_events;
-static atomic_t pmc_refcount;
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-static bool reserve_pmc_hardware(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
-                       goto perfctr_fail;
-       }
-
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
-                       goto eventsel_fail;
-       }
-
-       return true;
-
-eventsel_fail:
-       for (i--; i >= 0; i--)
-               release_evntsel_nmi(x86_pmu_config_addr(i));
-
-       i = x86_pmu.num_counters;
-
-perfctr_fail:
-       for (i--; i >= 0; i--)
-               release_perfctr_nmi(x86_pmu_event_addr(i));
-
-       return false;
-}
-
-static void release_pmc_hardware(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               release_perfctr_nmi(x86_pmu_event_addr(i));
-               release_evntsel_nmi(x86_pmu_config_addr(i));
-       }
-}
-
-#else
-
-static bool reserve_pmc_hardware(void) { return true; }
-static void release_pmc_hardware(void) {}
-
-#endif
-
-static bool check_hw_exists(void)
-{
-       u64 val, val_fail, val_new= ~0;
-       int i, reg, reg_fail, ret = 0;
-       int bios_fail = 0;
-       int reg_safe = -1;
-
-       /*
-        * Check to see if the BIOS enabled any of the counters, if so
-        * complain and bail.
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               reg = x86_pmu_config_addr(i);
-               ret = rdmsrl_safe(reg, &val);
-               if (ret)
-                       goto msr_fail;
-               if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
-                       bios_fail = 1;
-                       val_fail = val;
-                       reg_fail = reg;
-               } else {
-                       reg_safe = i;
-               }
-       }
-
-       if (x86_pmu.num_counters_fixed) {
-               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-               ret = rdmsrl_safe(reg, &val);
-               if (ret)
-                       goto msr_fail;
-               for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
-                       if (val & (0x03 << i*4)) {
-                               bios_fail = 1;
-                               val_fail = val;
-                               reg_fail = reg;
-                       }
-               }
-       }
-
-       /*
-        * If all the counters are enabled, the below test will always
-        * fail.  The tools will also become useless in this scenario.
-        * Just fail and disable the hardware counters.
-        */
-
-       if (reg_safe == -1) {
-               reg = reg_safe;
-               goto msr_fail;
-       }
-
-       /*
-        * Read the current value, change it and read it back to see if it
-        * matches, this is needed to detect certain hardware emulators
-        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
-        */
-       reg = x86_pmu_event_addr(reg_safe);
-       if (rdmsrl_safe(reg, &val))
-               goto msr_fail;
-       val ^= 0xffffUL;
-       ret = wrmsrl_safe(reg, val);
-       ret |= rdmsrl_safe(reg, &val_new);
-       if (ret || val != val_new)
-               goto msr_fail;
-
-       /*
-        * We still allow the PMU driver to operate:
-        */
-       if (bios_fail) {
-               printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
-               printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
-       }
-
-       return true;
-
-msr_fail:
-       printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
-       printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
-               boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
-               reg, val_new);
-
-       return false;
-}
-
-static void hw_perf_event_destroy(struct perf_event *event)
-{
-       x86_release_hardware();
-       atomic_dec(&active_events);
-}
-
-void hw_perf_lbr_event_destroy(struct perf_event *event)
-{
-       hw_perf_event_destroy(event);
-
-       /* undo the lbr/bts event accounting */
-       x86_del_exclusive(x86_lbr_exclusive_lbr);
-}
-
-static inline int x86_pmu_initialized(void)
-{
-       return x86_pmu.handle_irq != NULL;
-}
-
-static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
-{
-       struct perf_event_attr *attr = &event->attr;
-       unsigned int cache_type, cache_op, cache_result;
-       u64 config, val;
-
-       config = attr->config;
-
-       cache_type = (config >>  0) & 0xff;
-       if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-               return -EINVAL;
-
-       cache_op = (config >>  8) & 0xff;
-       if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-               return -EINVAL;
-
-       cache_result = (config >> 16) & 0xff;
-       if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-               return -EINVAL;
-
-       val = hw_cache_event_ids[cache_type][cache_op][cache_result];
-
-       if (val == 0)
-               return -ENOENT;
-
-       if (val == -1)
-               return -EINVAL;
-
-       hwc->config |= val;
-       attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
-       return x86_pmu_extra_regs(val, event);
-}
-
-int x86_reserve_hardware(void)
-{
-       int err = 0;
-
-       if (!atomic_inc_not_zero(&pmc_refcount)) {
-               mutex_lock(&pmc_reserve_mutex);
-               if (atomic_read(&pmc_refcount) == 0) {
-                       if (!reserve_pmc_hardware())
-                               err = -EBUSY;
-                       else
-                               reserve_ds_buffers();
-               }
-               if (!err)
-                       atomic_inc(&pmc_refcount);
-               mutex_unlock(&pmc_reserve_mutex);
-       }
-
-       return err;
-}
-
-void x86_release_hardware(void)
-{
-       if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
-               release_pmc_hardware();
-               release_ds_buffers();
-               mutex_unlock(&pmc_reserve_mutex);
-       }
-}
-
-/*
- * Check if we can create event of a certain type (that no conflicting events
- * are present).
- */
-int x86_add_exclusive(unsigned int what)
-{
-       int i;
-
-       if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
-               mutex_lock(&pmc_reserve_mutex);
-               for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
-                       if (i != what && atomic_read(&x86_pmu.lbr_exclusive[i]))
-                               goto fail_unlock;
-               }
-               atomic_inc(&x86_pmu.lbr_exclusive[what]);
-               mutex_unlock(&pmc_reserve_mutex);
-       }
-
-       atomic_inc(&active_events);
-       return 0;
-
-fail_unlock:
-       mutex_unlock(&pmc_reserve_mutex);
-       return -EBUSY;
-}
-
-void x86_del_exclusive(unsigned int what)
-{
-       atomic_dec(&x86_pmu.lbr_exclusive[what]);
-       atomic_dec(&active_events);
-}
-
-int x86_setup_perfctr(struct perf_event *event)
-{
-       struct perf_event_attr *attr = &event->attr;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 config;
-
-       if (!is_sampling_event(event)) {
-               hwc->sample_period = x86_pmu.max_period;
-               hwc->last_period = hwc->sample_period;
-               local64_set(&hwc->period_left, hwc->sample_period);
-       }
-
-       if (attr->type == PERF_TYPE_RAW)
-               return x86_pmu_extra_regs(event->attr.config, event);
-
-       if (attr->type == PERF_TYPE_HW_CACHE)
-               return set_ext_hw_attr(hwc, event);
-
-       if (attr->config >= x86_pmu.max_events)
-               return -EINVAL;
-
-       /*
-        * The generic map:
-        */
-       config = x86_pmu.event_map(attr->config);
-
-       if (config == 0)
-               return -ENOENT;
-
-       if (config == -1LL)
-               return -EINVAL;
-
-       /*
-        * Branch tracing:
-        */
-       if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-           !attr->freq && hwc->sample_period == 1) {
-               /* BTS is not supported by this architecture. */
-               if (!x86_pmu.bts_active)
-                       return -EOPNOTSUPP;
-
-               /* BTS is currently only allowed for user-mode. */
-               if (!attr->exclude_kernel)
-                       return -EOPNOTSUPP;
-
-               /* disallow bts if conflicting events are present */
-               if (x86_add_exclusive(x86_lbr_exclusive_lbr))
-                       return -EBUSY;
-
-               event->destroy = hw_perf_lbr_event_destroy;
-       }
-
-       hwc->config |= config;
-
-       return 0;
-}
-
-/*
- * check that branch_sample_type is compatible with
- * settings needed for precise_ip > 1 which implies
- * using the LBR to capture ALL taken branches at the
- * priv levels of the measurement
- */
-static inline int precise_br_compat(struct perf_event *event)
-{
-       u64 m = event->attr.branch_sample_type;
-       u64 b = 0;
-
-       /* must capture all branches */
-       if (!(m & PERF_SAMPLE_BRANCH_ANY))
-               return 0;
-
-       m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
-
-       if (!event->attr.exclude_user)
-               b |= PERF_SAMPLE_BRANCH_USER;
-
-       if (!event->attr.exclude_kernel)
-               b |= PERF_SAMPLE_BRANCH_KERNEL;
-
-       /*
-        * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
-        */
-
-       return m == b;
-}
-
-int x86_pmu_hw_config(struct perf_event *event)
-{
-       if (event->attr.precise_ip) {
-               int precise = 0;
-
-               /* Support for constant skid */
-               if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
-                       precise++;
-
-                       /* Support for IP fixup */
-                       if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
-                               precise++;
-
-                       if (x86_pmu.pebs_prec_dist)
-                               precise++;
-               }
-
-               if (event->attr.precise_ip > precise)
-                       return -EOPNOTSUPP;
-       }
-       /*
-        * check that PEBS LBR correction does not conflict with
-        * whatever the user is asking with attr->branch_sample_type
-        */
-       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format < 2) {
-               u64 *br_type = &event->attr.branch_sample_type;
-
-               if (has_branch_stack(event)) {
-                       if (!precise_br_compat(event))
-                               return -EOPNOTSUPP;
-
-                       /* branch_sample_type is compatible */
-
-               } else {
-                       /*
-                        * user did not specify  branch_sample_type
-                        *
-                        * For PEBS fixups, we capture all
-                        * the branches at the priv level of the
-                        * event.
-                        */
-                       *br_type = PERF_SAMPLE_BRANCH_ANY;
-
-                       if (!event->attr.exclude_user)
-                               *br_type |= PERF_SAMPLE_BRANCH_USER;
-
-                       if (!event->attr.exclude_kernel)
-                               *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
-               }
-       }
-
-       if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
-               event->attach_state |= PERF_ATTACH_TASK_DATA;
-
-       /*
-        * Generate PMC IRQs:
-        * (keep 'enabled' bit clear for now)
-        */
-       event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
-
-       /*
-        * Count user and OS events unless requested not to
-        */
-       if (!event->attr.exclude_user)
-               event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
-       if (!event->attr.exclude_kernel)
-               event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
-
-       if (event->attr.type == PERF_TYPE_RAW)
-               event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
-
-       if (event->attr.sample_period && x86_pmu.limit_period) {
-               if (x86_pmu.limit_period(event, event->attr.sample_period) >
-                               event->attr.sample_period)
-                       return -EINVAL;
-       }
-
-       return x86_setup_perfctr(event);
-}
-
-/*
- * Setup the hardware configuration for a given attr_type
- */
-static int __x86_pmu_event_init(struct perf_event *event)
-{
-       int err;
-
-       if (!x86_pmu_initialized())
-               return -ENODEV;
-
-       err = x86_reserve_hardware();
-       if (err)
-               return err;
-
-       atomic_inc(&active_events);
-       event->destroy = hw_perf_event_destroy;
-
-       event->hw.idx = -1;
-       event->hw.last_cpu = -1;
-       event->hw.last_tag = ~0ULL;
-
-       /* mark unused */
-       event->hw.extra_reg.idx = EXTRA_REG_NONE;
-       event->hw.branch_reg.idx = EXTRA_REG_NONE;
-
-       return x86_pmu.hw_config(event);
-}
-
-void x86_pmu_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               u64 val;
-
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-               rdmsrl(x86_pmu_config_addr(idx), val);
-               if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
-                       continue;
-               val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-               wrmsrl(x86_pmu_config_addr(idx), val);
-       }
-}
-
-static void x86_pmu_disable(struct pmu *pmu)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (!x86_pmu_initialized())
-               return;
-
-       if (!cpuc->enabled)
-               return;
-
-       cpuc->n_added = 0;
-       cpuc->enabled = 0;
-       barrier();
-
-       x86_pmu.disable_all();
-}
-
-void x86_pmu_enable_all(int added)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
-
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-
-               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-       }
-}
-
-static struct pmu pmu;
-
-static inline int is_x86_event(struct perf_event *event)
-{
-       return event->pmu == &pmu;
-}
-
-/*
- * Event scheduler state:
- *
- * Assign events iterating over all events and counters, beginning
- * with events with least weights first. Keep the current iterator
- * state in struct sched_state.
- */
-struct sched_state {
-       int     weight;
-       int     event;          /* event index */
-       int     counter;        /* counter index */
-       int     unassigned;     /* number of events to be assigned left */
-       int     nr_gp;          /* number of GP counters used */
-       unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-};
-
-/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
-#define        SCHED_STATES_MAX        2
-
-struct perf_sched {
-       int                     max_weight;
-       int                     max_events;
-       int                     max_gp;
-       int                     saved_states;
-       struct event_constraint **constraints;
-       struct sched_state      state;
-       struct sched_state      saved[SCHED_STATES_MAX];
-};
-
-/*
- * Initialize interator that runs through all events and counters.
- */
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
-                           int num, int wmin, int wmax, int gpmax)
-{
-       int idx;
-
-       memset(sched, 0, sizeof(*sched));
-       sched->max_events       = num;
-       sched->max_weight       = wmax;
-       sched->max_gp           = gpmax;
-       sched->constraints      = constraints;
-
-       for (idx = 0; idx < num; idx++) {
-               if (constraints[idx]->weight == wmin)
-                       break;
-       }
-
-       sched->state.event      = idx;          /* start with min weight */
-       sched->state.weight     = wmin;
-       sched->state.unassigned = num;
-}
-
-static void perf_sched_save_state(struct perf_sched *sched)
-{
-       if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
-               return;
-
-       sched->saved[sched->saved_states] = sched->state;
-       sched->saved_states++;
-}
-
-static bool perf_sched_restore_state(struct perf_sched *sched)
-{
-       if (!sched->saved_states)
-               return false;
-
-       sched->saved_states--;
-       sched->state = sched->saved[sched->saved_states];
-
-       /* continue with next counter: */
-       clear_bit(sched->state.counter++, sched->state.used);
-
-       return true;
-}
-
-/*
- * Select a counter for the current event to schedule. Return true on
- * success.
- */
-static bool __perf_sched_find_counter(struct perf_sched *sched)
-{
-       struct event_constraint *c;
-       int idx;
-
-       if (!sched->state.unassigned)
-               return false;
-
-       if (sched->state.event >= sched->max_events)
-               return false;
-
-       c = sched->constraints[sched->state.event];
-       /* Prefer fixed purpose counters */
-       if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
-               idx = INTEL_PMC_IDX_FIXED;
-               for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
-                       if (!__test_and_set_bit(idx, sched->state.used))
-                               goto done;
-               }
-       }
-
-       /* Grab the first unused counter starting with idx */
-       idx = sched->state.counter;
-       for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
-               if (!__test_and_set_bit(idx, sched->state.used)) {
-                       if (sched->state.nr_gp++ >= sched->max_gp)
-                               return false;
-
-                       goto done;
-               }
-       }
-
-       return false;
-
-done:
-       sched->state.counter = idx;
-
-       if (c->overlap)
-               perf_sched_save_state(sched);
-
-       return true;
-}
-
-static bool perf_sched_find_counter(struct perf_sched *sched)
-{
-       while (!__perf_sched_find_counter(sched)) {
-               if (!perf_sched_restore_state(sched))
-                       return false;
-       }
-
-       return true;
-}
-
-/*
- * Go through all unassigned events and find the next one to schedule.
- * Take events with the least weight first. Return true on success.
- */
-static bool perf_sched_next_event(struct perf_sched *sched)
-{
-       struct event_constraint *c;
-
-       if (!sched->state.unassigned || !--sched->state.unassigned)
-               return false;
-
-       do {
-               /* next event */
-               sched->state.event++;
-               if (sched->state.event >= sched->max_events) {
-                       /* next weight */
-                       sched->state.event = 0;
-                       sched->state.weight++;
-                       if (sched->state.weight > sched->max_weight)
-                               return false;
-               }
-               c = sched->constraints[sched->state.event];
-       } while (c->weight != sched->state.weight);
-
-       sched->state.counter = 0;       /* start with first counter */
-
-       return true;
-}
-
-/*
- * Assign a counter for each event.
- */
-int perf_assign_events(struct event_constraint **constraints, int n,
-                       int wmin, int wmax, int gpmax, int *assign)
-{
-       struct perf_sched sched;
-
-       perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);
-
-       do {
-               if (!perf_sched_find_counter(&sched))
-                       break;  /* failed */
-               if (assign)
-                       assign[sched.state.event] = sched.state.counter;
-       } while (perf_sched_next_event(&sched));
-
-       return sched.state.unassigned;
-}
-EXPORT_SYMBOL_GPL(perf_assign_events);
-
-int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
-{
-       struct event_constraint *c;
-       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       struct perf_event *e;
-       int i, wmin, wmax, unsched = 0;
-       struct hw_perf_event *hwc;
-
-       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-
-       if (x86_pmu.start_scheduling)
-               x86_pmu.start_scheduling(cpuc);
-
-       for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-               cpuc->event_constraint[i] = NULL;
-               c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
-               cpuc->event_constraint[i] = c;
-
-               wmin = min(wmin, c->weight);
-               wmax = max(wmax, c->weight);
-       }
-
-       /*
-        * fastpath, try to reuse previous register
-        */
-       for (i = 0; i < n; i++) {
-               hwc = &cpuc->event_list[i]->hw;
-               c = cpuc->event_constraint[i];
-
-               /* never assigned */
-               if (hwc->idx == -1)
-                       break;
-
-               /* constraint still honored */
-               if (!test_bit(hwc->idx, c->idxmsk))
-                       break;
-
-               /* not already used */
-               if (test_bit(hwc->idx, used_mask))
-                       break;
-
-               __set_bit(hwc->idx, used_mask);
-               if (assign)
-                       assign[i] = hwc->idx;
-       }
-
-       /* slow path */
-       if (i != n) {
-               int gpmax = x86_pmu.num_counters;
-
-               /*
-                * Do not allow scheduling of more than half the available
-                * generic counters.
-                *
-                * This helps avoid counter starvation of sibling thread by
-                * ensuring at most half the counters cannot be in exclusive
-                * mode. There is no designated counters for the limits. Any
-                * N/2 counters can be used. This helps with events with
-                * specific counter constraints.
-                */
-               if (is_ht_workaround_enabled() && !cpuc->is_fake &&
-                   READ_ONCE(cpuc->excl_cntrs->exclusive_present))
-                       gpmax /= 2;
-
-               unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
-                                            wmax, gpmax, assign);
-       }
-
-       /*
-        * In case of success (unsched = 0), mark events as committed,
-        * so we do not put_constraint() in case new events are added
-        * and fail to be scheduled
-        *
-        * We invoke the lower level commit callback to lock the resource
-        *
-        * We do not need to do all of this in case we are called to
-        * validate an event group (assign == NULL)
-        */
-       if (!unsched && assign) {
-               for (i = 0; i < n; i++) {
-                       e = cpuc->event_list[i];
-                       e->hw.flags |= PERF_X86_EVENT_COMMITTED;
-                       if (x86_pmu.commit_scheduling)
-                               x86_pmu.commit_scheduling(cpuc, i, assign[i]);
-               }
-       } else {
-               for (i = 0; i < n; i++) {
-                       e = cpuc->event_list[i];
-                       /*
-                        * do not put_constraint() on comitted events,
-                        * because they are good to go
-                        */
-                       if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
-                               continue;
-
-                       /*
-                        * release events that failed scheduling
-                        */
-                       if (x86_pmu.put_event_constraints)
-                               x86_pmu.put_event_constraints(cpuc, e);
-               }
-       }
-
-       if (x86_pmu.stop_scheduling)
-               x86_pmu.stop_scheduling(cpuc);
-
-       return unsched ? -EINVAL : 0;
-}
-
-/*
- * dogrp: true if must collect siblings events (group)
- * returns total number of events and error code
- */
-static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
-{
-       struct perf_event *event;
-       int n, max_count;
-
-       max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
-
-       /* current number of events already accepted */
-       n = cpuc->n_events;
-
-       if (is_x86_event(leader)) {
-               if (n >= max_count)
-                       return -EINVAL;
-               cpuc->event_list[n] = leader;
-               n++;
-       }
-       if (!dogrp)
-               return n;
-
-       list_for_each_entry(event, &leader->sibling_list, group_entry) {
-               if (!is_x86_event(event) ||
-                   event->state <= PERF_EVENT_STATE_OFF)
-                       continue;
-
-               if (n >= max_count)
-                       return -EINVAL;
-
-               cpuc->event_list[n] = event;
-               n++;
-       }
-       return n;
-}
-
-static inline void x86_assign_hw_event(struct perf_event *event,
-                               struct cpu_hw_events *cpuc, int i)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       hwc->idx = cpuc->assign[i];
-       hwc->last_cpu = smp_processor_id();
-       hwc->last_tag = ++cpuc->tags[i];
-
-       if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
-               hwc->config_base = 0;
-               hwc->event_base = 0;
-       } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
-               hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-               hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
-               hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
-       } else {
-               hwc->config_base = x86_pmu_config_addr(hwc->idx);
-               hwc->event_base  = x86_pmu_event_addr(hwc->idx);
-               hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
-       }
-}
-
-static inline int match_prev_assignment(struct hw_perf_event *hwc,
-                                       struct cpu_hw_events *cpuc,
-                                       int i)
-{
-       return hwc->idx == cpuc->assign[i] &&
-               hwc->last_cpu == smp_processor_id() &&
-               hwc->last_tag == cpuc->tags[i];
-}
-
-static void x86_pmu_start(struct perf_event *event, int flags);
-
-static void x86_pmu_enable(struct pmu *pmu)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct perf_event *event;
-       struct hw_perf_event *hwc;
-       int i, added = cpuc->n_added;
-
-       if (!x86_pmu_initialized())
-               return;
-
-       if (cpuc->enabled)
-               return;
-
-       if (cpuc->n_added) {
-               int n_running = cpuc->n_events - cpuc->n_added;
-               /*
-                * apply assignment obtained either from
-                * hw_perf_group_sched_in() or x86_pmu_enable()
-                *
-                * step1: save events moving to new counters
-                */
-               for (i = 0; i < n_running; i++) {
-                       event = cpuc->event_list[i];
-                       hwc = &event->hw;
-
-                       /*
-                        * we can avoid reprogramming counter if:
-                        * - assigned same counter as last time
-                        * - running on same CPU as last time
-                        * - no other event has used the counter since
-                        */
-                       if (hwc->idx == -1 ||
-                           match_prev_assignment(hwc, cpuc, i))
-                               continue;
-
-                       /*
-                        * Ensure we don't accidentally enable a stopped
-                        * counter simply because we rescheduled.
-                        */
-                       if (hwc->state & PERF_HES_STOPPED)
-                               hwc->state |= PERF_HES_ARCH;
-
-                       x86_pmu_stop(event, PERF_EF_UPDATE);
-               }
-
-               /*
-                * step2: reprogram moved events into new counters
-                */
-               for (i = 0; i < cpuc->n_events; i++) {
-                       event = cpuc->event_list[i];
-                       hwc = &event->hw;
-
-                       if (!match_prev_assignment(hwc, cpuc, i))
-                               x86_assign_hw_event(event, cpuc, i);
-                       else if (i < n_running)
-                               continue;
-
-                       if (hwc->state & PERF_HES_ARCH)
-                               continue;
-
-                       x86_pmu_start(event, PERF_EF_RELOAD);
-               }
-               cpuc->n_added = 0;
-               perf_events_lapic_init();
-       }
-
-       cpuc->enabled = 1;
-       barrier();
-
-       x86_pmu.enable_all(added);
-}
-
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the event disabled in hw:
- */
-int x86_perf_event_set_period(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       s64 left = local64_read(&hwc->period_left);
-       s64 period = hwc->sample_period;
-       int ret = 0, idx = hwc->idx;
-
-       if (idx == INTEL_PMC_IDX_FIXED_BTS)
-               return 0;
-
-       /*
-        * If we are way outside a reasonable range then just skip forward:
-        */
-       if (unlikely(left <= -period)) {
-               left = period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-
-       if (unlikely(left <= 0)) {
-               left += period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               ret = 1;
-       }
-       /*
-        * Quirk: certain CPUs dont like it if just 1 hw_event is left:
-        */
-       if (unlikely(left < 2))
-               left = 2;
-
-       if (left > x86_pmu.max_period)
-               left = x86_pmu.max_period;
-
-       if (x86_pmu.limit_period)
-               left = x86_pmu.limit_period(event, left);
-
-       per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
-
-       if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
-           local64_read(&hwc->prev_count) != (u64)-left) {
-               /*
-                * The hw event starts counting from this event offset,
-                * mark it to be able to extra future deltas:
-                */
-               local64_set(&hwc->prev_count, (u64)-left);
-
-               wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
-       }
-
-       /*
-        * Due to erratum on certan cpu we need
-        * a second write to be sure the register
-        * is updated properly
-        */
-       if (x86_pmu.perfctr_second_write) {
-               wrmsrl(hwc->event_base,
-                       (u64)(-left) & x86_pmu.cntval_mask);
-       }
-
-       perf_event_update_userpage(event);
-
-       return ret;
-}
-
-void x86_pmu_enable_event(struct perf_event *event)
-{
-       if (__this_cpu_read(cpu_hw_events.enabled))
-               __x86_pmu_enable_event(&event->hw,
-                                      ARCH_PERFMON_EVENTSEL_ENABLE);
-}
-
-/*
- * Add a single event to the PMU.
- *
- * The event is added to the group of enabled events
- * but only if it can be scehduled with existing events.
- */
-static int x86_pmu_add(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc;
-       int assign[X86_PMC_IDX_MAX];
-       int n, n0, ret;
-
-       hwc = &event->hw;
-
-       n0 = cpuc->n_events;
-       ret = n = collect_events(cpuc, event, false);
-       if (ret < 0)
-               goto out;
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-       if (!(flags & PERF_EF_START))
-               hwc->state |= PERF_HES_ARCH;
-
-       /*
-        * If group events scheduling transaction was started,
-        * skip the schedulability test here, it will be performed
-        * at commit time (->commit_txn) as a whole.
-        */
-       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-               goto done_collect;
-
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
-       if (ret)
-               goto out;
-       /*
-        * copy new assignment, now we know it is possible
-        * will be used by hw_perf_enable()
-        */
-       memcpy(cpuc->assign, assign, n*sizeof(int));
-
-done_collect:
-       /*
-        * Commit the collect_events() state. See x86_pmu_del() and
-        * x86_pmu_*_txn().
-        */
-       cpuc->n_events = n;
-       cpuc->n_added += n - n0;
-       cpuc->n_txn += n - n0;
-
-       ret = 0;
-out:
-       return ret;
-}
-
-static void x86_pmu_start(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx = event->hw.idx;
-
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       if (WARN_ON_ONCE(idx == -1))
-               return;
-
-       if (flags & PERF_EF_RELOAD) {
-               WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-               x86_perf_event_set_period(event);
-       }
-
-       event->hw.state = 0;
-
-       cpuc->events[idx] = event;
-       __set_bit(idx, cpuc->active_mask);
-       __set_bit(idx, cpuc->running);
-       x86_pmu.enable(event);
-       perf_event_update_userpage(event);
-}
-
-void perf_event_print_debug(void)
-{
-       u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-       u64 pebs, debugctl;
-       struct cpu_hw_events *cpuc;
-       unsigned long flags;
-       int cpu, idx;
-
-       if (!x86_pmu.num_counters)
-               return;
-
-       local_irq_save(flags);
-
-       cpu = smp_processor_id();
-       cpuc = &per_cpu(cpu_hw_events, cpu);
-
-       if (x86_pmu.version >= 2) {
-               rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
-               rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-               rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
-               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-
-               pr_info("\n");
-               pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
-               pr_info("CPU#%d: status:     %016llx\n", cpu, status);
-               pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
-               pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
-               if (x86_pmu.pebs_constraints) {
-                       rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
-                       pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
-               }
-               if (x86_pmu.lbr_nr) {
-                       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-                       pr_info("CPU#%d: debugctl:   %016llx\n", cpu, debugctl);
-               }
-       }
-       pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
-               rdmsrl(x86_pmu_event_addr(idx), pmc_count);
-
-               prev_left = per_cpu(pmc_prev_left[idx], cpu);
-
-               pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
-                       cpu, idx, pmc_ctrl);
-               pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
-                       cpu, idx, pmc_count);
-               pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
-                       cpu, idx, prev_left);
-       }
-       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-               rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
-
-               pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
-                       cpu, idx, pmc_count);
-       }
-       local_irq_restore(flags);
-}
-
-void x86_pmu_stop(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
-               x86_pmu.disable(event);
-               cpuc->events[hwc->idx] = NULL;
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-       }
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               x86_perf_event_update(event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static void x86_pmu_del(struct perf_event *event, int flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int i;
-
-       /*
-        * event is descheduled
-        */
-       event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
-       /*
-        * If we're called during a txn, we don't need to do anything.
-        * The events never got scheduled and ->cancel_txn will truncate
-        * the event_list.
-        *
-        * XXX assumes any ->del() called during a TXN will only be on
-        * an event added during that same TXN.
-        */
-       if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-               return;
-
-       /*
-        * Not a TXN, therefore cleanup properly.
-        */
-       x86_pmu_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < cpuc->n_events; i++) {
-               if (event == cpuc->event_list[i])
-                       break;
-       }
-
-       if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
-               return;
-
-       /* If we have a newly added event; make sure to decrease n_added. */
-       if (i >= cpuc->n_events - cpuc->n_added)
-               --cpuc->n_added;
-
-       if (x86_pmu.put_event_constraints)
-               x86_pmu.put_event_constraints(cpuc, event);
-
-       /* Delete the array entry. */
-       while (++i < cpuc->n_events) {
-               cpuc->event_list[i-1] = cpuc->event_list[i];
-               cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
-       }
-       --cpuc->n_events;
-
-       perf_event_update_userpage(event);
-}
-
-int x86_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       struct perf_event *event;
-       int idx, handled = 0;
-       u64 val;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       /*
-        * Some chipsets need to unmask the LVTPC in a particular spot
-        * inside the nmi handler.  As a result, the unmasking was pushed
-        * into all the nmi handlers.
-        *
-        * This generic handler doesn't seem to have any issues where the
-        * unmasking occurs so it was left at the top.
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               if (!test_bit(idx, cpuc->active_mask)) {
-                       /*
-                        * Though we deactivated the counter some cpus
-                        * might still deliver spurious interrupts still
-                        * in flight. Catch them:
-                        */
-                       if (__test_and_clear_bit(idx, cpuc->running))
-                               handled++;
-                       continue;
-               }
-
-               event = cpuc->events[idx];
-
-               val = x86_perf_event_update(event);
-               if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
-                       continue;
-
-               /*
-                * event overflow
-                */
-               handled++;
-               perf_sample_data_init(&data, 0, event->hw.last_period);
-
-               if (!x86_perf_event_set_period(event))
-                       continue;
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       if (handled)
-               inc_irq_stat(apic_perf_irqs);
-
-       return handled;
-}
-
-void perf_events_lapic_init(void)
-{
-       if (!x86_pmu.apic || !x86_pmu_initialized())
-               return;
-
-       /*
-        * Always use NMI for PMU
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-}
-
-static int
-perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
-{
-       u64 start_clock;
-       u64 finish_clock;
-       int ret;
-
-       /*
-        * All PMUs/events that share this PMI handler should make sure to
-        * increment active_events for their events.
-        */
-       if (!atomic_read(&active_events))
-               return NMI_DONE;
-
-       start_clock = sched_clock();
-       ret = x86_pmu.handle_irq(regs);
-       finish_clock = sched_clock();
-
-       perf_sample_event_took(finish_clock - start_clock);
-
-       return ret;
-}
-NOKPROBE_SYMBOL(perf_event_nmi_handler);
-
-struct event_constraint emptyconstraint;
-struct event_constraint unconstrained;
-
-static int
-x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       int i, ret = NOTIFY_OK;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
-                       cpuc->kfree_on_online[i] = NULL;
-               if (x86_pmu.cpu_prepare)
-                       ret = x86_pmu.cpu_prepare(cpu);
-               break;
-
-       case CPU_STARTING:
-               if (x86_pmu.cpu_starting)
-                       x86_pmu.cpu_starting(cpu);
-               break;
-
-       case CPU_ONLINE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
-                       kfree(cpuc->kfree_on_online[i]);
-                       cpuc->kfree_on_online[i] = NULL;
-               }
-               break;
-
-       case CPU_DYING:
-               if (x86_pmu.cpu_dying)
-                       x86_pmu.cpu_dying(cpu);
-               break;
-
-       case CPU_UP_CANCELED:
-       case CPU_DEAD:
-               if (x86_pmu.cpu_dead)
-                       x86_pmu.cpu_dead(cpu);
-               break;
-
-       default:
-               break;
-       }
-
-       return ret;
-}
-
-static void __init pmu_check_apic(void)
-{
-       if (cpu_has_apic)
-               return;
-
-       x86_pmu.apic = 0;
-       pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
-       pr_info("no hardware sampling interrupt available.\n");
-
-       /*
-        * If we have a PMU initialized but no APIC
-        * interrupts, we cannot sample hardware
-        * events (user-space has to fall back and
-        * sample via a hrtimer based software event):
-        */
-       pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
-}
-
-static struct attribute_group x86_pmu_format_group = {
-       .name = "format",
-       .attrs = NULL,
-};
-
-/*
- * Remove all undefined events (x86_pmu.event_map(id) == 0)
- * out of events_attr attributes.
- */
-static void __init filter_events(struct attribute **attrs)
-{
-       struct device_attribute *d;
-       struct perf_pmu_events_attr *pmu_attr;
-       int offset = 0;
-       int i, j;
-
-       for (i = 0; attrs[i]; i++) {
-               d = (struct device_attribute *)attrs[i];
-               pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
-               /* str trumps id */
-               if (pmu_attr->event_str)
-                       continue;
-               if (x86_pmu.event_map(i + offset))
-                       continue;
-
-               for (j = i; attrs[j]; j++)
-                       attrs[j] = attrs[j + 1];
-
-               /* Check the shifted attr. */
-               i--;
-
-               /*
-                * event_map() is index based, the attrs array is organized
-                * by increasing event index. If we shift the events, then
-                * we need to compensate for the event_map(), otherwise
-                * we are looking up the wrong event in the map
-                */
-               offset++;
-       }
-}
-
-/* Merge two pointer arrays */
-__init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
-{
-       struct attribute **new;
-       int j, i;
-
-       for (j = 0; a[j]; j++)
-               ;
-       for (i = 0; b[i]; i++)
-               j++;
-       j++;
-
-       new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
-       if (!new)
-               return NULL;
-
-       j = 0;
-       for (i = 0; a[i]; i++)
-               new[j++] = a[i];
-       for (i = 0; b[i]; i++)
-               new[j++] = b[i];
-       new[j] = NULL;
-
-       return new;
-}
-
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
-                         char *page)
-{
-       struct perf_pmu_events_attr *pmu_attr = \
-               container_of(attr, struct perf_pmu_events_attr, attr);
-       u64 config = x86_pmu.event_map(pmu_attr->id);
-
-       /* string trumps id */
-       if (pmu_attr->event_str)
-               return sprintf(page, "%s", pmu_attr->event_str);
-
-       return x86_pmu.events_sysfs_show(page, config);
-}
-
-EVENT_ATTR(cpu-cycles,                 CPU_CYCLES              );
-EVENT_ATTR(instructions,               INSTRUCTIONS            );
-EVENT_ATTR(cache-references,           CACHE_REFERENCES        );
-EVENT_ATTR(cache-misses,               CACHE_MISSES            );
-EVENT_ATTR(branch-instructions,                BRANCH_INSTRUCTIONS     );
-EVENT_ATTR(branch-misses,              BRANCH_MISSES           );
-EVENT_ATTR(bus-cycles,                 BUS_CYCLES              );
-EVENT_ATTR(stalled-cycles-frontend,    STALLED_CYCLES_FRONTEND );
-EVENT_ATTR(stalled-cycles-backend,     STALLED_CYCLES_BACKEND  );
-EVENT_ATTR(ref-cycles,                 REF_CPU_CYCLES          );
-
-static struct attribute *empty_attrs;
-
-static struct attribute *events_attr[] = {
-       EVENT_PTR(CPU_CYCLES),
-       EVENT_PTR(INSTRUCTIONS),
-       EVENT_PTR(CACHE_REFERENCES),
-       EVENT_PTR(CACHE_MISSES),
-       EVENT_PTR(BRANCH_INSTRUCTIONS),
-       EVENT_PTR(BRANCH_MISSES),
-       EVENT_PTR(BUS_CYCLES),
-       EVENT_PTR(STALLED_CYCLES_FRONTEND),
-       EVENT_PTR(STALLED_CYCLES_BACKEND),
-       EVENT_PTR(REF_CPU_CYCLES),
-       NULL,
-};
-
-static struct attribute_group x86_pmu_events_group = {
-       .name = "events",
-       .attrs = events_attr,
-};
-
-ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
-{
-       u64 umask  = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-       u64 cmask  = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
-       bool edge  = (config & ARCH_PERFMON_EVENTSEL_EDGE);
-       bool pc    = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
-       bool any   = (config & ARCH_PERFMON_EVENTSEL_ANY);
-       bool inv   = (config & ARCH_PERFMON_EVENTSEL_INV);
-       ssize_t ret;
-
-       /*
-       * We have whole page size to spend and just little data
-       * to write, so we can safely use sprintf.
-       */
-       ret = sprintf(page, "event=0x%02llx", event);
-
-       if (umask)
-               ret += sprintf(page + ret, ",umask=0x%02llx", umask);
-
-       if (edge)
-               ret += sprintf(page + ret, ",edge");
-
-       if (pc)
-               ret += sprintf(page + ret, ",pc");
-
-       if (any)
-               ret += sprintf(page + ret, ",any");
-
-       if (inv)
-               ret += sprintf(page + ret, ",inv");
-
-       if (cmask)
-               ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
-
-       ret += sprintf(page + ret, "\n");
-
-       return ret;
-}
-
-static int __init init_hw_perf_events(void)
-{
-       struct x86_pmu_quirk *quirk;
-       int err;
-
-       pr_info("Performance Events: ");
-
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_INTEL:
-               err = intel_pmu_init();
-               break;
-       case X86_VENDOR_AMD:
-               err = amd_pmu_init();
-               break;
-       default:
-               err = -ENOTSUPP;
-       }
-       if (err != 0) {
-               pr_cont("no PMU driver, software events only.\n");
-               return 0;
-       }
-
-       pmu_check_apic();
-
-       /* sanity check that the hardware exists or is emulated */
-       if (!check_hw_exists())
-               return 0;
-
-       pr_cont("%s PMU driver.\n", x86_pmu.name);
-
-       x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
-
-       for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
-               quirk->func();
-
-       if (!x86_pmu.intel_ctrl)
-               x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
-       perf_events_lapic_init();
-       register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
-
-       unconstrained = (struct event_constraint)
-               __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-                                  0, x86_pmu.num_counters, 0, 0);
-
-       x86_pmu_format_group.attrs = x86_pmu.format_attrs;
-
-       if (x86_pmu.event_attrs)
-               x86_pmu_events_group.attrs = x86_pmu.event_attrs;
-
-       if (!x86_pmu.events_sysfs_show)
-               x86_pmu_events_group.attrs = &empty_attrs;
-       else
-               filter_events(x86_pmu_events_group.attrs);
-
-       if (x86_pmu.cpu_events) {
-               struct attribute **tmp;
-
-               tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
-               if (!WARN_ON(!tmp))
-                       x86_pmu_events_group.attrs = tmp;
-       }
-
-       pr_info("... version:                %d\n",     x86_pmu.version);
-       pr_info("... bit width:              %d\n",     x86_pmu.cntval_bits);
-       pr_info("... generic registers:      %d\n",     x86_pmu.num_counters);
-       pr_info("... value mask:             %016Lx\n", x86_pmu.cntval_mask);
-       pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
-       pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
-       pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
-
-       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-       perf_cpu_notifier(x86_pmu_notifier);
-
-       return 0;
-}
-early_initcall(init_hw_perf_events);
-
-static inline void x86_pmu_read(struct perf_event *event)
-{
-       x86_perf_event_update(event);
-}
-
-/*
- * Start group events scheduling transaction
- * Set the flag to make pmu::enable() not perform the
- * schedulability test, it will be performed at commit time
- *
- * We only support PERF_PMU_TXN_ADD transactions. Save the
- * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
- * transactions.
- */
-static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       WARN_ON_ONCE(cpuc->txn_flags);          /* txn already in flight */
-
-       cpuc->txn_flags = txn_flags;
-       if (txn_flags & ~PERF_PMU_TXN_ADD)
-               return;
-
-       perf_pmu_disable(pmu);
-       __this_cpu_write(cpu_hw_events.n_txn, 0);
-}
-
-/*
- * Stop group events scheduling transaction
- * Clear the flag and pmu::enable() will perform the
- * schedulability test.
- */
-static void x86_pmu_cancel_txn(struct pmu *pmu)
-{
-       unsigned int txn_flags;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
-
-       txn_flags = cpuc->txn_flags;
-       cpuc->txn_flags = 0;
-       if (txn_flags & ~PERF_PMU_TXN_ADD)
-               return;
-
-       /*
-        * Truncate collected array by the number of events added in this
-        * transaction. See x86_pmu_add() and x86_pmu_*_txn().
-        */
-       __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
-       __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
-       perf_pmu_enable(pmu);
-}
-
-/*
- * Commit group events scheduling transaction
- * Perform the group schedulability test as a whole
- * Return 0 if success
- *
- * Does not cancel the transaction on failure; expects the caller to do this.
- */
-static int x86_pmu_commit_txn(struct pmu *pmu)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int assign[X86_PMC_IDX_MAX];
-       int n, ret;
-
-       WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
-
-       if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
-               cpuc->txn_flags = 0;
-               return 0;
-       }
-
-       n = cpuc->n_events;
-
-       if (!x86_pmu_initialized())
-               return -EAGAIN;
-
-       ret = x86_pmu.schedule_events(cpuc, n, assign);
-       if (ret)
-               return ret;
-
-       /*
-        * copy new assignment, now we know it is possible
-        * will be used by hw_perf_enable()
-        */
-       memcpy(cpuc->assign, assign, n*sizeof(int));
-
-       cpuc->txn_flags = 0;
-       perf_pmu_enable(pmu);
-       return 0;
-}
-/*
- * a fake_cpuc is used to validate event groups. Due to
- * the extra reg logic, we need to also allocate a fake
- * per_core and per_cpu structure. Otherwise, group events
- * using extra reg may conflict without the kernel being
- * able to catch this when the last event gets added to
- * the group.
- */
-static void free_fake_cpuc(struct cpu_hw_events *cpuc)
-{
-       kfree(cpuc->shared_regs);
-       kfree(cpuc);
-}
-
-static struct cpu_hw_events *allocate_fake_cpuc(void)
-{
-       struct cpu_hw_events *cpuc;
-       int cpu = raw_smp_processor_id();
-
-       cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
-       if (!cpuc)
-               return ERR_PTR(-ENOMEM);
-
-       /* only needed, if we have extra_regs */
-       if (x86_pmu.extra_regs) {
-               cpuc->shared_regs = allocate_shared_regs(cpu);
-               if (!cpuc->shared_regs)
-                       goto error;
-       }
-       cpuc->is_fake = 1;
-       return cpuc;
-error:
-       free_fake_cpuc(cpuc);
-       return ERR_PTR(-ENOMEM);
-}
-
-/*
- * validate that we can schedule this event
- */
-static int validate_event(struct perf_event *event)
-{
-       struct cpu_hw_events *fake_cpuc;
-       struct event_constraint *c;
-       int ret = 0;
-
-       fake_cpuc = allocate_fake_cpuc();
-       if (IS_ERR(fake_cpuc))
-               return PTR_ERR(fake_cpuc);
-
-       c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
-
-       if (!c || !c->weight)
-               ret = -EINVAL;
-
-       if (x86_pmu.put_event_constraints)
-               x86_pmu.put_event_constraints(fake_cpuc, event);
-
-       free_fake_cpuc(fake_cpuc);
-
-       return ret;
-}
-
-/*
- * validate a single event group
- *
- * validation include:
- *     - check events are compatible which each other
- *     - events do not compete for the same counter
- *     - number of events <= number of counters
- *
- * validation ensures the group can be loaded onto the
- * PMU if it was the only group available.
- */
-static int validate_group(struct perf_event *event)
-{
-       struct perf_event *leader = event->group_leader;
-       struct cpu_hw_events *fake_cpuc;
-       int ret = -EINVAL, n;
-
-       fake_cpuc = allocate_fake_cpuc();
-       if (IS_ERR(fake_cpuc))
-               return PTR_ERR(fake_cpuc);
-       /*
-        * the event is not yet connected with its
-        * siblings therefore we must first collect
-        * existing siblings, then add the new event
-        * before we can simulate the scheduling
-        */
-       n = collect_events(fake_cpuc, leader, true);
-       if (n < 0)
-               goto out;
-
-       fake_cpuc->n_events = n;
-       n = collect_events(fake_cpuc, event, false);
-       if (n < 0)
-               goto out;
-
-       fake_cpuc->n_events = n;
-
-       ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
-
-out:
-       free_fake_cpuc(fake_cpuc);
-       return ret;
-}
-
-static int x86_pmu_event_init(struct perf_event *event)
-{
-       struct pmu *tmp;
-       int err;
-
-       switch (event->attr.type) {
-       case PERF_TYPE_RAW:
-       case PERF_TYPE_HARDWARE:
-       case PERF_TYPE_HW_CACHE:
-               break;
-
-       default:
-               return -ENOENT;
-       }
-
-       err = __x86_pmu_event_init(event);
-       if (!err) {
-               /*
-                * we temporarily connect event to its pmu
-                * such that validate_group() can classify
-                * it as an x86 event using is_x86_event()
-                */
-               tmp = event->pmu;
-               event->pmu = &pmu;
-
-               if (event->group_leader != event)
-                       err = validate_group(event);
-               else
-                       err = validate_event(event);
-
-               event->pmu = tmp;
-       }
-       if (err) {
-               if (event->destroy)
-                       event->destroy(event);
-       }
-
-       if (ACCESS_ONCE(x86_pmu.attr_rdpmc))
-               event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
-
-       return err;
-}
-
-static void refresh_pce(void *ignored)
-{
-       if (current->mm)
-               load_mm_cr4(current->mm);
-}
-
-static void x86_pmu_event_mapped(struct perf_event *event)
-{
-       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
-               return;
-
-       if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1)
-               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
-}
-
-static void x86_pmu_event_unmapped(struct perf_event *event)
-{
-       if (!current->mm)
-               return;
-
-       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
-               return;
-
-       if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed))
-               on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
-}
-
-static int x86_pmu_event_idx(struct perf_event *event)
-{
-       int idx = event->hw.idx;
-
-       if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
-               return 0;
-
-       if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
-               idx -= INTEL_PMC_IDX_FIXED;
-               idx |= 1 << 30;
-       }
-
-       return idx + 1;
-}
-
-static ssize_t get_attr_rdpmc(struct device *cdev,
-                             struct device_attribute *attr,
-                             char *buf)
-{
-       return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
-}
-
-static ssize_t set_attr_rdpmc(struct device *cdev,
-                             struct device_attribute *attr,
-                             const char *buf, size_t count)
-{
-       unsigned long val;
-       ssize_t ret;
-
-       ret = kstrtoul(buf, 0, &val);
-       if (ret)
-               return ret;
-
-       if (val > 2)
-               return -EINVAL;
-
-       if (x86_pmu.attr_rdpmc_broken)
-               return -ENOTSUPP;
-
-       if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
-               /*
-                * Changing into or out of always available, aka
-                * perf-event-bypassing mode.  This path is extremely slow,
-                * but only root can trigger it, so it's okay.
-                */
-               if (val == 2)
-                       static_key_slow_inc(&rdpmc_always_available);
-               else
-                       static_key_slow_dec(&rdpmc_always_available);
-               on_each_cpu(refresh_pce, NULL, 1);
-       }
-
-       x86_pmu.attr_rdpmc = val;
-
-       return count;
-}
-
-static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
-
-static struct attribute *x86_pmu_attrs[] = {
-       &dev_attr_rdpmc.attr,
-       NULL,
-};
-
-static struct attribute_group x86_pmu_attr_group = {
-       .attrs = x86_pmu_attrs,
-};
-
-static const struct attribute_group *x86_pmu_attr_groups[] = {
-       &x86_pmu_attr_group,
-       &x86_pmu_format_group,
-       &x86_pmu_events_group,
-       NULL,
-};
-
-static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-       if (x86_pmu.sched_task)
-               x86_pmu.sched_task(ctx, sched_in);
-}
-
-void perf_check_microcode(void)
-{
-       if (x86_pmu.check_microcode)
-               x86_pmu.check_microcode();
-}
-EXPORT_SYMBOL_GPL(perf_check_microcode);
-
-static struct pmu pmu = {
-       .pmu_enable             = x86_pmu_enable,
-       .pmu_disable            = x86_pmu_disable,
-
-       .attr_groups            = x86_pmu_attr_groups,
-
-       .event_init             = x86_pmu_event_init,
-
-       .event_mapped           = x86_pmu_event_mapped,
-       .event_unmapped         = x86_pmu_event_unmapped,
-
-       .add                    = x86_pmu_add,
-       .del                    = x86_pmu_del,
-       .start                  = x86_pmu_start,
-       .stop                   = x86_pmu_stop,
-       .read                   = x86_pmu_read,
-
-       .start_txn              = x86_pmu_start_txn,
-       .cancel_txn             = x86_pmu_cancel_txn,
-       .commit_txn             = x86_pmu_commit_txn,
-
-       .event_idx              = x86_pmu_event_idx,
-       .sched_task             = x86_pmu_sched_task,
-       .task_ctx_size          = sizeof(struct x86_perf_task_context),
-};
-
-void arch_perf_update_userpage(struct perf_event *event,
-                              struct perf_event_mmap_page *userpg, u64 now)
-{
-       struct cyc2ns_data *data;
-
-       userpg->cap_user_time = 0;
-       userpg->cap_user_time_zero = 0;
-       userpg->cap_user_rdpmc =
-               !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
-       userpg->pmc_width = x86_pmu.cntval_bits;
-
-       if (!sched_clock_stable())
-               return;
-
-       data = cyc2ns_read_begin();
-
-       /*
-        * Internal timekeeping for enabled/running/stopped times
-        * is always in the local_clock domain.
-        */
-       userpg->cap_user_time = 1;
-       userpg->time_mult = data->cyc2ns_mul;
-       userpg->time_shift = data->cyc2ns_shift;
-       userpg->time_offset = data->cyc2ns_offset - now;
-
-       /*
-        * cap_user_time_zero doesn't make sense when we're using a different
-        * time base for the records.
-        */
-       if (event->clock == &local_clock) {
-               userpg->cap_user_time_zero = 1;
-               userpg->time_zero = data->cyc2ns_offset;
-       }
-
-       cyc2ns_read_end(data);
-}
-
-/*
- * callchain support
- */
-
-static int backtrace_stack(void *data, char *name)
-{
-       return 0;
-}
-
-static void backtrace_address(void *data, unsigned long addr, int reliable)
-{
-       struct perf_callchain_entry *entry = data;
-
-       perf_callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
-       .stack                  = backtrace_stack,
-       .address                = backtrace_address,
-       .walk_stack             = print_context_stack_bp,
-};
-
-void
-perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               /* TODO: We don't support guest os callchain now */
-               return;
-       }
-
-       perf_callchain_store(entry, regs->ip);
-
-       dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
-}
-
-static inline int
-valid_user_frame(const void __user *fp, unsigned long size)
-{
-       return (__range_not_ok(fp, size, TASK_SIZE) == 0);
-}
-
-static unsigned long get_segment_base(unsigned int segment)
-{
-       struct desc_struct *desc;
-       int idx = segment >> 3;
-
-       if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-               struct ldt_struct *ldt;
-
-               if (idx > LDT_ENTRIES)
-                       return 0;
-
-               /* IRQs are off, so this synchronizes with smp_store_release */
-               ldt = lockless_dereference(current->active_mm->context.ldt);
-               if (!ldt || idx > ldt->size)
-                       return 0;
-
-               desc = &ldt->entries[idx];
-#else
-               return 0;
-#endif
-       } else {
-               if (idx > GDT_ENTRIES)
-                       return 0;
-
-               desc = raw_cpu_ptr(gdt_page.gdt) + idx;
-       }
-
-       return get_desc_base(desc);
-}
-
-#ifdef CONFIG_IA32_EMULATION
-
-#include <asm/compat.h>
-
-static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-       /* 32-bit process in 64-bit kernel. */
-       unsigned long ss_base, cs_base;
-       struct stack_frame_ia32 frame;
-       const void __user *fp;
-
-       if (!test_thread_flag(TIF_IA32))
-               return 0;
-
-       cs_base = get_segment_base(regs->cs);
-       ss_base = get_segment_base(regs->ss);
-
-       fp = compat_ptr(ss_base + regs->bp);
-       pagefault_disable();
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
-               unsigned long bytes;
-               frame.next_frame     = 0;
-               frame.return_address = 0;
-
-               if (!access_ok(VERIFY_READ, fp, 8))
-                       break;
-
-               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
-               if (bytes != 0)
-                       break;
-               bytes = __copy_from_user_nmi(&frame.return_address, fp+4, 4);
-               if (bytes != 0)
-                       break;
-
-               if (!valid_user_frame(fp, sizeof(frame)))
-                       break;
-
-               perf_callchain_store(entry, cs_base + frame.return_address);
-               fp = compat_ptr(ss_base + frame.next_frame);
-       }
-       pagefault_enable();
-       return 1;
-}
-#else
-static inline int
-perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-    return 0;
-}
-#endif
-
-void
-perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
-{
-       struct stack_frame frame;
-       const void __user *fp;
-
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               /* TODO: We don't support guest os callchain now */
-               return;
-       }
-
-       /*
-        * We don't know what to do with VM86 stacks.. ignore them for now.
-        */
-       if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
-               return;
-
-       fp = (void __user *)regs->bp;
-
-       perf_callchain_store(entry, regs->ip);
-
-       if (!current->mm)
-               return;
-
-       if (perf_callchain_user32(regs, entry))
-               return;
-
-       pagefault_disable();
-       while (entry->nr < PERF_MAX_STACK_DEPTH) {
-               unsigned long bytes;
-               frame.next_frame             = NULL;
-               frame.return_address = 0;
-
-               if (!access_ok(VERIFY_READ, fp, 16))
-                       break;
-
-               bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
-               if (bytes != 0)
-                       break;
-               bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
-               if (bytes != 0)
-                       break;
-
-               if (!valid_user_frame(fp, sizeof(frame)))
-                       break;
-
-               perf_callchain_store(entry, frame.return_address);
-               fp = (void __user *)frame.next_frame;
-       }
-       pagefault_enable();
-}
-
-/*
- * Deal with code segment offsets for the various execution modes:
- *
- *   VM86 - the good olde 16 bit days, where the linear address is
- *          20 bits and we use regs->ip + 0x10 * regs->cs.
- *
- *   IA32 - Where we need to look at GDT/LDT segment descriptor tables
- *          to figure out what the 32bit base address is.
- *
- *    X32 - has TIF_X32 set, but is running in x86_64
- *
- * X86_64 - CS,DS,SS,ES are all zero based.
- */
-static unsigned long code_segment_base(struct pt_regs *regs)
-{
-       /*
-        * For IA32 we look at the GDT/LDT segment base to convert the
-        * effective IP to a linear address.
-        */
-
-#ifdef CONFIG_X86_32
-       /*
-        * If we are in VM86 mode, add the segment offset to convert to a
-        * linear address.
-        */
-       if (regs->flags & X86_VM_MASK)
-               return 0x10 * regs->cs;
-
-       if (user_mode(regs) && regs->cs != __USER_CS)
-               return get_segment_base(regs->cs);
-#else
-       if (user_mode(regs) && !user_64bit_mode(regs) &&
-           regs->cs != __USER32_CS)
-               return get_segment_base(regs->cs);
-#endif
-       return 0;
-}
-
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
-               return perf_guest_cbs->get_guest_ip();
-
-       return regs->ip + code_segment_base(regs);
-}
-
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
-       int misc = 0;
-
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               if (perf_guest_cbs->is_user_mode())
-                       misc |= PERF_RECORD_MISC_GUEST_USER;
-               else
-                       misc |= PERF_RECORD_MISC_GUEST_KERNEL;
-       } else {
-               if (user_mode(regs))
-                       misc |= PERF_RECORD_MISC_USER;
-               else
-                       misc |= PERF_RECORD_MISC_KERNEL;
-       }
-
-       if (regs->flags & PERF_EFLAGS_EXACT)
-               misc |= PERF_RECORD_MISC_EXACT_IP;
-
-       return misc;
-}
-
-void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
-{
-       cap->version            = x86_pmu.version;
-       cap->num_counters_gp    = x86_pmu.num_counters;
-       cap->num_counters_fixed = x86_pmu.num_counters_fixed;
-       cap->bit_width_gp       = x86_pmu.cntval_bits;
-       cap->bit_width_fixed    = x86_pmu.cntval_bits;
-       cap->events_mask        = (unsigned int)x86_pmu.events_maskl;
-       cap->events_mask_len    = x86_pmu.events_mask_len;
-}
-EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
deleted file mode 100644 (file)
index 7bb61e3..0000000
+++ /dev/null
@@ -1,955 +0,0 @@
-/*
- * Performance events x86 architecture header
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- *  Copyright (C) 2009 Google, Inc., Stephane Eranian
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-
-/* To enable MSR tracing please use the generic trace points. */
-
-/*
- *          |   NHM/WSM    |      SNB     |
- * register -------------------------------
- *          |  HT  | no HT |  HT  | no HT |
- *-----------------------------------------
- * offcore  | core | core  | cpu  | core  |
- * lbr_sel  | core | core  | cpu  | core  |
- * ld_lat   | cpu  | core  | cpu  | core  |
- *-----------------------------------------
- *
- * Given that there is a small number of shared regs,
- * we can pre-allocate their slot in the per-cpu
- * per-core reg tables.
- */
-enum extra_reg_type {
-       EXTRA_REG_NONE  = -1,   /* not used */
-
-       EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
-       EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
-       EXTRA_REG_LBR   = 2,    /* lbr_select */
-       EXTRA_REG_LDLAT = 3,    /* ld_lat_threshold */
-       EXTRA_REG_FE    = 4,    /* fe_* */
-
-       EXTRA_REG_MAX           /* number of entries needed */
-};
-
-struct event_constraint {
-       union {
-               unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-               u64             idxmsk64;
-       };
-       u64     code;
-       u64     cmask;
-       int     weight;
-       int     overlap;
-       int     flags;
-};
-/*
- * struct hw_perf_event.flags flags
- */
-#define PERF_X86_EVENT_PEBS_LDLAT      0x0001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST         0x0002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW     0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED       0x0008 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW     0x0010 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW     0x0020 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL            0x0040 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC         0x0080 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED   0x0100 /* grant rdpmc permission */
-#define PERF_X86_EVENT_EXCL_ACCT       0x0200 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD     0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_FREERUNNING     0x0800 /* use freerunning PEBS */
-
-
-struct amd_nb {
-       int nb_id;  /* NorthBridge id */
-       int refcnt; /* reference count */
-       struct perf_event *owners[X86_PMC_IDX_MAX];
-       struct event_constraint event_constraints[X86_PMC_IDX_MAX];
-};
-
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS                8
-
-/*
- * Flags PEBS can handle without an PMI.
- *
- * TID can only be handled by flushing at context switch.
- *
- */
-#define PEBS_FREERUNNING_FLAGS \
-       (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
-       PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
-       PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
-       PERF_SAMPLE_TRANSACTION)
-
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-       u64     bts_buffer_base;
-       u64     bts_index;
-       u64     bts_absolute_maximum;
-       u64     bts_interrupt_threshold;
-       u64     pebs_buffer_base;
-       u64     pebs_index;
-       u64     pebs_absolute_maximum;
-       u64     pebs_interrupt_threshold;
-       u64     pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
-/*
- * Per register state.
- */
-struct er_account {
-       raw_spinlock_t          lock;   /* per-core: protect structure */
-       u64                 config;     /* extra MSR config */
-       u64                 reg;        /* extra MSR number */
-       atomic_t            ref;        /* reference count */
-};
-
-/*
- * Per core/cpu state
- *
- * Used to coordinate shared registers between HT threads or
- * among events on a single PMU.
- */
-struct intel_shared_regs {
-       struct er_account       regs[EXTRA_REG_MAX];
-       int                     refcnt;         /* per-core: #HT threads */
-       unsigned                core_id;        /* per-core: core id */
-};
-
-enum intel_excl_state_type {
-       INTEL_EXCL_UNUSED    = 0, /* counter is unused */
-       INTEL_EXCL_SHARED    = 1, /* counter can be used by both threads */
-       INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
-};
-
-struct intel_excl_states {
-       enum intel_excl_state_type state[X86_PMC_IDX_MAX];
-       bool sched_started; /* true if scheduling has started */
-};
-
-struct intel_excl_cntrs {
-       raw_spinlock_t  lock;
-
-       struct intel_excl_states states[2];
-
-       union {
-               u16     has_exclusive[2];
-               u32     exclusive_present;
-       };
-
-       int             refcnt;         /* per-core: #HT threads */
-       unsigned        core_id;        /* per-core: core id */
-};
-
-#define MAX_LBR_ENTRIES                32
-
-enum {
-       X86_PERF_KFREE_SHARED = 0,
-       X86_PERF_KFREE_EXCL   = 1,
-       X86_PERF_KFREE_MAX
-};
-
-struct cpu_hw_events {
-       /*
-        * Generic x86 PMC bits
-        */
-       struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
-       unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       unsigned long           running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       int                     enabled;
-
-       int                     n_events; /* the # of events in the below arrays */
-       int                     n_added;  /* the # last events in the below arrays;
-                                            they've never been enabled yet */
-       int                     n_txn;    /* the # last events in the below arrays;
-                                            added in the current transaction */
-       int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
-       u64                     tags[X86_PMC_IDX_MAX];
-
-       struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
-       struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
-
-       int                     n_excl; /* the number of exclusive events */
-
-       unsigned int            txn_flags;
-       int                     is_fake;
-
-       /*
-        * Intel DebugStore bits
-        */
-       struct debug_store      *ds;
-       u64                     pebs_enabled;
-
-       /*
-        * Intel LBR bits
-        */
-       int                             lbr_users;
-       void                            *lbr_context;
-       struct perf_branch_stack        lbr_stack;
-       struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
-       struct er_account               *lbr_sel;
-       u64                             br_sel;
-
-       /*
-        * Intel host/guest exclude bits
-        */
-       u64                             intel_ctrl_guest_mask;
-       u64                             intel_ctrl_host_mask;
-       struct perf_guest_switch_msr    guest_switch_msrs[X86_PMC_IDX_MAX];
-
-       /*
-        * Intel checkpoint mask
-        */
-       u64                             intel_cp_status;
-
-       /*
-        * manage shared (per-core, per-cpu) registers
-        * used on Intel NHM/WSM/SNB
-        */
-       struct intel_shared_regs        *shared_regs;
-       /*
-        * manage exclusive counter access between hyperthread
-        */
-       struct event_constraint *constraint_list; /* in enable order */
-       struct intel_excl_cntrs         *excl_cntrs;
-       int excl_thread_id; /* 0 or 1 */
-
-       /*
-        * AMD specific bits
-        */
-       struct amd_nb                   *amd_nb;
-       /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
-       u64                             perf_ctr_virt_mask;
-
-       void                            *kfree_on_online[X86_PERF_KFREE_MAX];
-};
-
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
-       { .idxmsk64 = (n) },            \
-       .code = (c),                    \
-       .cmask = (m),                   \
-       .weight = (w),                  \
-       .overlap = (o),                 \
-       .flags = f,                     \
-}
-
-#define EVENT_CONSTRAINT(c, n, m)      \
-       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
-
-#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
-       __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
-                          0, PERF_X86_EVENT_EXCL)
-
-/*
- * The overlap flag marks event constraints with overlapping counter
- * masks. This is the case if the counter mask of such an event is not
- * a subset of any other counter mask of a constraint with an equal or
- * higher weight, e.g.:
- *
- *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
- *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
- *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
- *
- * The event scheduler may not select the correct counter in the first
- * cycle because it needs to know which subsequent events will be
- * scheduled. It may fail to schedule the events then. So we set the
- * overlap flag for such constraints to give the scheduler a hint which
- * events to select for counter rescheduling.
- *
- * Care must be taken as the rescheduling algorithm is O(n!) which
- * will increase scheduling cycles for an over-commited system
- * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
- * and its counter masks must be kept at a minimum.
- */
-#define EVENT_CONSTRAINT_OVERLAP(c, n, m)      \
-       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
-
-/*
- * Constraint on the Event code.
- */
-#define INTEL_EVENT_CONSTRAINT(c, n)   \
-       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
-
-/*
- * Constraint on the Event code + UMask + fixed-mask
- *
- * filter mask to validate fixed counter events.
- * the following filters disqualify for fixed counters:
- *  - inv
- *  - edge
- *  - cnt-mask
- *  - in_tx
- *  - in_tx_checkpointed
- *  The other filters are supported by fixed counters.
- *  The any-thread option is supported starting with v3.
- */
-#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
-#define FIXED_EVENT_CONSTRAINT(c, n)   \
-       EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
-
-/*
- * Constraint on the Event code + UMask
- */
-#define INTEL_UEVENT_CONSTRAINT(c, n)  \
-       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-
-/* Constraint on specific umask bit only + event */
-#define INTEL_UBIT_EVENT_CONSTRAINT(c, n)      \
-       EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
-
-/* Like UEVENT_CONSTRAINT, but match flags too */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n)    \
-       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
-
-#define INTEL_EXCLUEVT_CONSTRAINT(c, n)        \
-       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
-                          HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
-
-#define INTEL_PLD_CONSTRAINT(c, n)     \
-       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                          HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
-
-#define INTEL_PST_CONSTRAINT(c, n)     \
-       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
-
-/* Event constraint, but match on all event flags too. */
-#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
-       EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
-
-/* Check only flags, but allow all event/umask */
-#define INTEL_ALL_EVENT_CONSTRAINT(code, n)    \
-       EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
-
-/* Check flags and event code, and set the HSW store flag */
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
-
-/* Check flags and event code, and set the HSW load flag */
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
-
-#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, \
-                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW store flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
-
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, \
-                         PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW load flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
-
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, \
-                         PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
-
-/* Check flags and event code/umask, and set the HSW N/A flag */
-#define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
-       __EVENT_CONSTRAINT(code, n,                     \
-                         INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
-                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
-
-
-/*
- * We define the end marker as having a weight of -1
- * to enable blacklisting of events using a counter bitmask
- * of zero and thus a weight of zero.
- * The end marker has a weight that cannot possibly be
- * obtained from counting the bits in the bitmask.
- */
-#define EVENT_CONSTRAINT_END { .weight = -1 }
-
-/*
- * Check for end marker with weight == -1
- */
-#define for_each_event_constraint(e, c)        \
-       for ((e) = (c); (e)->weight != -1; (e)++)
-
-/*
- * Extra registers for specific events.
- *
- * Some events need large masks and require external MSRs.
- * Those extra MSRs end up being shared for all events on
- * a PMU and sometimes between PMU of sibling HT threads.
- * In either case, the kernel needs to handle conflicting
- * accesses to those extra, shared, regs. The data structure
- * to manage those registers is stored in cpu_hw_event.
- */
-struct extra_reg {
-       unsigned int            event;
-       unsigned int            msr;
-       u64                     config_mask;
-       u64                     valid_mask;
-       int                     idx;  /* per_xxx->regs[] reg index */
-       bool                    extra_msr_access;
-};
-
-#define EVENT_EXTRA_REG(e, ms, m, vm, i) {     \
-       .event = (e),                   \
-       .msr = (ms),                    \
-       .config_mask = (m),             \
-       .valid_mask = (vm),             \
-       .idx = EXTRA_REG_##i,           \
-       .extra_msr_access = true,       \
-       }
-
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)     \
-       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
-
-#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
-       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
-                       ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
-
-#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
-       INTEL_UEVENT_EXTRA_REG(c, \
-                              MSR_PEBS_LD_LAT_THRESHOLD, \
-                              0xffff, \
-                              LDLAT)
-
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
-
-union perf_capabilities {
-       struct {
-               u64     lbr_format:6;
-               u64     pebs_trap:1;
-               u64     pebs_arch_reg:1;
-               u64     pebs_format:4;
-               u64     smm_freeze:1;
-               /*
-                * PMU supports separate counter range for writing
-                * values > 32bit.
-                */
-               u64     full_width_write:1;
-       };
-       u64     capabilities;
-};
-
-struct x86_pmu_quirk {
-       struct x86_pmu_quirk *next;
-       void (*func)(void);
-};
-
-union x86_pmu_config {
-       struct {
-               u64 event:8,
-                   umask:8,
-                   usr:1,
-                   os:1,
-                   edge:1,
-                   pc:1,
-                   interrupt:1,
-                   __reserved1:1,
-                   en:1,
-                   inv:1,
-                   cmask:8,
-                   event2:4,
-                   __reserved2:4,
-                   go:1,
-                   ho:1;
-       } bits;
-       u64 value;
-};
-
-#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
-
-enum {
-       x86_lbr_exclusive_lbr,
-       x86_lbr_exclusive_bts,
-       x86_lbr_exclusive_pt,
-       x86_lbr_exclusive_max,
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
-       /*
-        * Generic x86 PMC bits
-        */
-       const char      *name;
-       int             version;
-       int             (*handle_irq)(struct pt_regs *);
-       void            (*disable_all)(void);
-       void            (*enable_all)(int added);
-       void            (*enable)(struct perf_event *);
-       void            (*disable)(struct perf_event *);
-       int             (*hw_config)(struct perf_event *event);
-       int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
-       unsigned        eventsel;
-       unsigned        perfctr;
-       int             (*addr_offset)(int index, bool eventsel);
-       int             (*rdpmc_index)(int index);
-       u64             (*event_map)(int);
-       int             max_events;
-       int             num_counters;
-       int             num_counters_fixed;
-       int             cntval_bits;
-       u64             cntval_mask;
-       union {
-                       unsigned long events_maskl;
-                       unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
-       };
-       int             events_mask_len;
-       int             apic;
-       u64             max_period;
-       struct event_constraint *
-                       (*get_event_constraints)(struct cpu_hw_events *cpuc,
-                                                int idx,
-                                                struct perf_event *event);
-
-       void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
-                                                struct perf_event *event);
-
-       void            (*start_scheduling)(struct cpu_hw_events *cpuc);
-
-       void            (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
-
-       void            (*stop_scheduling)(struct cpu_hw_events *cpuc);
-
-       struct event_constraint *event_constraints;
-       struct x86_pmu_quirk *quirks;
-       int             perfctr_second_write;
-       bool            late_ack;
-       unsigned        (*limit_period)(struct perf_event *event, unsigned l);
-
-       /*
-        * sysfs attrs
-        */
-       int             attr_rdpmc_broken;
-       int             attr_rdpmc;
-       struct attribute **format_attrs;
-       struct attribute **event_attrs;
-
-       ssize_t         (*events_sysfs_show)(char *page, u64 config);
-       struct attribute **cpu_events;
-
-       /*
-        * CPU Hotplug hooks
-        */
-       int             (*cpu_prepare)(int cpu);
-       void            (*cpu_starting)(int cpu);
-       void            (*cpu_dying)(int cpu);
-       void            (*cpu_dead)(int cpu);
-
-       void            (*check_microcode)(void);
-       void            (*sched_task)(struct perf_event_context *ctx,
-                                     bool sched_in);
-
-       /*
-        * Intel Arch Perfmon v2+
-        */
-       u64                     intel_ctrl;
-       union perf_capabilities intel_cap;
-
-       /*
-        * Intel DebugStore bits
-        */
-       unsigned int    bts             :1,
-                       bts_active      :1,
-                       pebs            :1,
-                       pebs_active     :1,
-                       pebs_broken     :1,
-                       pebs_prec_dist  :1;
-       int             pebs_record_size;
-       void            (*drain_pebs)(struct pt_regs *regs);
-       struct event_constraint *pebs_constraints;
-       void            (*pebs_aliases)(struct perf_event *event);
-       int             max_pebs_events;
-       unsigned long   free_running_flags;
-
-       /*
-        * Intel LBR
-        */
-       unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
-       int             lbr_nr;                    /* hardware stack size */
-       u64             lbr_sel_mask;              /* LBR_SELECT valid bits */
-       const int       *lbr_sel_map;              /* lbr_select mappings */
-       bool            lbr_double_abort;          /* duplicated lbr aborts */
-
-       /*
-        * Intel PT/LBR/BTS are exclusive
-        */
-       atomic_t        lbr_exclusive[x86_lbr_exclusive_max];
-
-       /*
-        * Extra registers for events
-        */
-       struct extra_reg *extra_regs;
-       unsigned int flags;
-
-       /*
-        * Intel host/guest support (KVM)
-        */
-       struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
-};
-
-struct x86_perf_task_context {
-       u64 lbr_from[MAX_LBR_ENTRIES];
-       u64 lbr_to[MAX_LBR_ENTRIES];
-       u64 lbr_info[MAX_LBR_ENTRIES];
-       int tos;
-       int lbr_callstack_users;
-       int lbr_stack_state;
-};
-
-#define x86_add_quirk(func_)                                           \
-do {                                                                   \
-       static struct x86_pmu_quirk __quirk __initdata = {              \
-               .func = func_,                                          \
-       };                                                              \
-       __quirk.next = x86_pmu.quirks;                                  \
-       x86_pmu.quirks = &__quirk;                                      \
-} while (0)
-
-/*
- * x86_pmu flags
- */
-#define PMU_FL_NO_HT_SHARING   0x1 /* no hyper-threading resource sharing */
-#define PMU_FL_HAS_RSP_1       0x2 /* has 2 equivalent offcore_rsp regs   */
-#define PMU_FL_EXCL_CNTRS      0x4 /* has exclusive counter requirements  */
-#define PMU_FL_EXCL_ENABLED    0x8 /* exclusive counter active */
-
-#define EVENT_VAR(_id)  event_attr_##_id
-#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
-
-#define EVENT_ATTR(_name, _id)                                         \
-static struct perf_pmu_events_attr EVENT_VAR(_id) = {                  \
-       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
-       .id             = PERF_COUNT_HW_##_id,                          \
-       .event_str      = NULL,                                         \
-};
-
-#define EVENT_ATTR_STR(_name, v, str)                                  \
-static struct perf_pmu_events_attr event_attr_##v = {                  \
-       .attr           = __ATTR(_name, 0444, events_sysfs_show, NULL), \
-       .id             = 0,                                            \
-       .event_str      = str,                                          \
-};
-
-extern struct x86_pmu x86_pmu __read_mostly;
-
-static inline bool x86_pmu_has_lbr_callstack(void)
-{
-       return  x86_pmu.lbr_sel_map &&
-               x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
-}
-
-DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-int x86_perf_event_set_period(struct perf_event *event);
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-extern u64 __read_mostly hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-extern u64 __read_mostly hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-u64 x86_perf_event_update(struct perf_event *event);
-
-static inline unsigned int x86_pmu_config_addr(int index)
-{
-       return x86_pmu.eventsel + (x86_pmu.addr_offset ?
-                                  x86_pmu.addr_offset(index, true) : index);
-}
-
-static inline unsigned int x86_pmu_event_addr(int index)
-{
-       return x86_pmu.perfctr + (x86_pmu.addr_offset ?
-                                 x86_pmu.addr_offset(index, false) : index);
-}
-
-static inline int x86_pmu_rdpmc_index(int index)
-{
-       return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
-}
-
-int x86_add_exclusive(unsigned int what);
-
-void x86_del_exclusive(unsigned int what);
-
-int x86_reserve_hardware(void);
-
-void x86_release_hardware(void);
-
-void hw_perf_lbr_event_destroy(struct perf_event *event);
-
-int x86_setup_perfctr(struct perf_event *event);
-
-int x86_pmu_hw_config(struct perf_event *event);
-
-void x86_pmu_disable_all(void);
-
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
-                                         u64 enable_mask)
-{
-       u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
-
-       if (hwc->extra_reg.reg)
-               wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
-       wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
-}
-
-void x86_pmu_enable_all(int added);
-
-int perf_assign_events(struct event_constraint **constraints, int n,
-                       int wmin, int wmax, int gpmax, int *assign);
-int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
-
-void x86_pmu_stop(struct perf_event *event, int flags);
-
-static inline void x86_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       wrmsrl(hwc->config_base, hwc->config);
-}
-
-void x86_pmu_enable_event(struct perf_event *event);
-
-int x86_pmu_handle_irq(struct pt_regs *regs);
-
-extern struct event_constraint emptyconstraint;
-
-extern struct event_constraint unconstrained;
-
-static inline bool kernel_ip(unsigned long ip)
-{
-#ifdef CONFIG_X86_32
-       return ip > PAGE_OFFSET;
-#else
-       return (long)ip < 0;
-#endif
-}
-
-/*
- * Not all PMUs provide the right context information to place the reported IP
- * into full context. Specifically segment registers are typically not
- * supplied.
- *
- * Assuming the address is a linear address (it is for IBS), we fake the CS and
- * vm86 mode using the known zero-based code segment and 'fix up' the registers
- * to reflect this.
- *
- * Intel PEBS/LBR appear to typically provide the effective address, nothing
- * much we can do about that but pray and treat it like a linear address.
- */
-static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
-{
-       regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
-       if (regs->flags & X86_VM_MASK)
-               regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
-       regs->ip = ip;
-}
-
-ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
-ssize_t intel_event_sysfs_show(char *page, u64 config);
-
-struct attribute **merge_attr(struct attribute **a, struct attribute **b);
-
-#ifdef CONFIG_CPU_SUP_AMD
-
-int amd_pmu_init(void);
-
-#else /* CONFIG_CPU_SUP_AMD */
-
-static inline int amd_pmu_init(void)
-{
-       return 0;
-}
-
-#endif /* CONFIG_CPU_SUP_AMD */
-
-#ifdef CONFIG_CPU_SUP_INTEL
-
-static inline bool intel_pmu_has_bts(struct perf_event *event)
-{
-       if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-           !event->attr.freq && event->hw.sample_period == 1)
-               return true;
-
-       return false;
-}
-
-int intel_pmu_save_and_restart(struct perf_event *event);
-
-struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event);
-
-struct intel_shared_regs *allocate_shared_regs(int cpu);
-
-int intel_pmu_init(void);
-
-void init_debug_store_on_cpu(int cpu);
-
-void fini_debug_store_on_cpu(int cpu);
-
-void release_ds_buffers(void);
-
-void reserve_ds_buffers(void);
-
-extern struct event_constraint bts_constraint;
-
-void intel_pmu_enable_bts(u64 config);
-
-void intel_pmu_disable_bts(void);
-
-int intel_pmu_drain_bts_buffer(void);
-
-extern struct event_constraint intel_core2_pebs_event_constraints[];
-
-extern struct event_constraint intel_atom_pebs_event_constraints[];
-
-extern struct event_constraint intel_slm_pebs_event_constraints[];
-
-extern struct event_constraint intel_nehalem_pebs_event_constraints[];
-
-extern struct event_constraint intel_westmere_pebs_event_constraints[];
-
-extern struct event_constraint intel_snb_pebs_event_constraints[];
-
-extern struct event_constraint intel_ivb_pebs_event_constraints[];
-
-extern struct event_constraint intel_hsw_pebs_event_constraints[];
-
-extern struct event_constraint intel_skl_pebs_event_constraints[];
-
-struct event_constraint *intel_pebs_constraints(struct perf_event *event);
-
-void intel_pmu_pebs_enable(struct perf_event *event);
-
-void intel_pmu_pebs_disable(struct perf_event *event);
-
-void intel_pmu_pebs_enable_all(void);
-
-void intel_pmu_pebs_disable_all(void);
-
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
-
-void intel_ds_init(void);
-
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
-
-void intel_pmu_lbr_reset(void);
-
-void intel_pmu_lbr_enable(struct perf_event *event);
-
-void intel_pmu_lbr_disable(struct perf_event *event);
-
-void intel_pmu_lbr_enable_all(bool pmi);
-
-void intel_pmu_lbr_disable_all(void);
-
-void intel_pmu_lbr_read(void);
-
-void intel_pmu_lbr_init_core(void);
-
-void intel_pmu_lbr_init_nhm(void);
-
-void intel_pmu_lbr_init_atom(void);
-
-void intel_pmu_lbr_init_snb(void);
-
-void intel_pmu_lbr_init_hsw(void);
-
-void intel_pmu_lbr_init_skl(void);
-
-void intel_pmu_lbr_init_knl(void);
-
-int intel_pmu_setup_lbr_filter(struct perf_event *event);
-
-void intel_pt_interrupt(void);
-
-int intel_bts_interrupt(void);
-
-void intel_bts_enable_local(void);
-
-void intel_bts_disable_local(void);
-
-int p4_pmu_init(void);
-
-int p6_pmu_init(void);
-
-int knc_pmu_init(void);
-
-ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
-                         char *page);
-
-static inline int is_ht_workaround_enabled(void)
-{
-       return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
-}
-
-#else /* CONFIG_CPU_SUP_INTEL */
-
-static inline void reserve_ds_buffers(void)
-{
-}
-
-static inline void release_ds_buffers(void)
-{
-}
-
-static inline int intel_pmu_init(void)
-{
-       return 0;
-}
-
-static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
-       return NULL;
-}
-
-static inline int is_ht_workaround_enabled(void)
-{
-       return 0;
-}
-#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
deleted file mode 100644 (file)
index 5861053..0000000
+++ /dev/null
@@ -1,731 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/apicdef.h>
-
-#include "perf_event.h"
-
-static __initconst const u64 amd_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-               [ C(RESULT_MISS)   ] = 0x0141, /* Data Cache Misses          */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-               [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
-               [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-               [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-               [ C(RESULT_MISS)   ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-               [ C(RESULT_MISS)   ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
-               [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
-               [ C(RESULT_MISS)   ] = 0x98e9, /* CPU Request to Memory, r   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]                   = 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]                 = 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]             = 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]                 = 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]          = 0x00c2,
-  [PERF_COUNT_HW_BRANCH_MISSES]                        = 0x00c3,
-  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]      = 0x00d0, /* "Decoder empty" event */
-  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]       = 0x00d1, /* "Dispatch stalls" event */
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
-       return amd_perfmon_event_map[hw_event];
-}
-
-/*
- * Previously calculated offsets
- */
-static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
-static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
-
-/*
- * Legacy CPUs:
- *   4 counters starting at 0xc0010000 each offset by 1
- *
- * CPUs with core performance counter extensions:
- *   6 counters starting at 0xc0010200 each offset by 2
- */
-static inline int amd_pmu_addr_offset(int index, bool eventsel)
-{
-       int offset;
-
-       if (!index)
-               return index;
-
-       if (eventsel)
-               offset = event_offsets[index];
-       else
-               offset = count_offsets[index];
-
-       if (offset)
-               return offset;
-
-       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-               offset = index;
-       else
-               offset = index << 1;
-
-       if (eventsel)
-               event_offsets[index] = offset;
-       else
-               count_offsets[index] = offset;
-
-       return offset;
-}
-
-static int amd_core_hw_config(struct perf_event *event)
-{
-       if (event->attr.exclude_host && event->attr.exclude_guest)
-               /*
-                * When HO == GO == 1 the hardware treats that as GO == HO == 0
-                * and will count in both modes. We don't want to count in that
-                * case so we emulate no-counting by setting US = OS = 0.
-                */
-               event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
-                                     ARCH_PERFMON_EVENTSEL_OS);
-       else if (event->attr.exclude_host)
-               event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
-       else if (event->attr.exclude_guest)
-               event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
-
-       return 0;
-}
-
-/*
- * AMD64 events are detected based on their event codes.
- */
-static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
-{
-       return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
-}
-
-static inline int amd_is_nb_event(struct hw_perf_event *hwc)
-{
-       return (hwc->config & 0xe0) == 0xe0;
-}
-
-static inline int amd_has_nb(struct cpu_hw_events *cpuc)
-{
-       struct amd_nb *nb = cpuc->amd_nb;
-
-       return nb && nb->nb_id != -1;
-}
-
-static int amd_pmu_hw_config(struct perf_event *event)
-{
-       int ret;
-
-       /* pass precise event sampling to ibs: */
-       if (event->attr.precise_ip && get_ibs_caps())
-               return -ENOENT;
-
-       if (has_branch_stack(event))
-               return -EOPNOTSUPP;
-
-       ret = x86_pmu_hw_config(event);
-       if (ret)
-               return ret;
-
-       if (event->attr.type == PERF_TYPE_RAW)
-               event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
-
-       return amd_core_hw_config(event);
-}
-
-static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
-                                          struct perf_event *event)
-{
-       struct amd_nb *nb = cpuc->amd_nb;
-       int i;
-
-       /*
-        * need to scan whole list because event may not have
-        * been assigned during scheduling
-        *
-        * no race condition possible because event can only
-        * be removed on one CPU at a time AND PMU is disabled
-        * when we come here
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               if (cmpxchg(nb->owners + i, event, NULL) == event)
-                       break;
-       }
-}
-
- /*
-  * AMD64 NorthBridge events need special treatment because
-  * counter access needs to be synchronized across all cores
-  * of a package. Refer to BKDG section 3.12
-  *
-  * NB events are events measuring L3 cache, Hypertransport
-  * traffic. They are identified by an event code >= 0xe00.
-  * They measure events on the NorthBride which is shared
-  * by all cores on a package. NB events are counted on a
-  * shared set of counters. When a NB event is programmed
-  * in a counter, the data actually comes from a shared
-  * counter. Thus, access to those counters needs to be
-  * synchronized.
-  *
-  * We implement the synchronization such that no two cores
-  * can be measuring NB events using the same counters. Thus,
-  * we maintain a per-NB allocation table. The available slot
-  * is propagated using the event_constraint structure.
-  *
-  * We provide only one choice for each NB event based on
-  * the fact that only NB events have restrictions. Consequently,
-  * if a counter is available, there is a guarantee the NB event
-  * will be assigned to it. If no slot is available, an empty
-  * constraint is returned and scheduling will eventually fail
-  * for this event.
-  *
-  * Note that all cores attached the same NB compete for the same
-  * counters to host NB events, this is why we use atomic ops. Some
-  * multi-chip CPUs may have more than one NB.
-  *
-  * Given that resources are allocated (cmpxchg), they must be
-  * eventually freed for others to use. This is accomplished by
-  * calling __amd_put_nb_event_constraints()
-  *
-  * Non NB events are not impacted by this restriction.
-  */
-static struct event_constraint *
-__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
-                              struct event_constraint *c)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct amd_nb *nb = cpuc->amd_nb;
-       struct perf_event *old;
-       int idx, new = -1;
-
-       if (!c)
-               c = &unconstrained;
-
-       if (cpuc->is_fake)
-               return c;
-
-       /*
-        * detect if already present, if so reuse
-        *
-        * cannot merge with actual allocation
-        * because of possible holes
-        *
-        * event can already be present yet not assigned (in hwc->idx)
-        * because of successive calls to x86_schedule_events() from
-        * hw_perf_group_sched_in() without hw_perf_enable()
-        */
-       for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
-               if (new == -1 || hwc->idx == idx)
-                       /* assign free slot, prefer hwc->idx */
-                       old = cmpxchg(nb->owners + idx, NULL, event);
-               else if (nb->owners[idx] == event)
-                       /* event already present */
-                       old = event;
-               else
-                       continue;
-
-               if (old && old != event)
-                       continue;
-
-               /* reassign to this slot */
-               if (new != -1)
-                       cmpxchg(nb->owners + new, event, NULL);
-               new = idx;
-
-               /* already present, reuse */
-               if (old == event)
-                       break;
-       }
-
-       if (new == -1)
-               return &emptyconstraint;
-
-       return &nb->event_constraints[new];
-}
-
-static struct amd_nb *amd_alloc_nb(int cpu)
-{
-       struct amd_nb *nb;
-       int i;
-
-       nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
-       if (!nb)
-               return NULL;
-
-       nb->nb_id = -1;
-
-       /*
-        * initialize all possible NB constraints
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               __set_bit(i, nb->event_constraints[i].idxmsk);
-               nb->event_constraints[i].weight = 1;
-       }
-       return nb;
-}
-
-static int amd_pmu_cpu_prepare(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-
-       WARN_ON_ONCE(cpuc->amd_nb);
-
-       if (boot_cpu_data.x86_max_cores < 2)
-               return NOTIFY_OK;
-
-       cpuc->amd_nb = amd_alloc_nb(cpu);
-       if (!cpuc->amd_nb)
-               return NOTIFY_BAD;
-
-       return NOTIFY_OK;
-}
-
-static void amd_pmu_cpu_starting(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
-       struct amd_nb *nb;
-       int i, nb_id;
-
-       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
-
-       if (boot_cpu_data.x86_max_cores < 2)
-               return;
-
-       nb_id = amd_get_nb_id(cpu);
-       WARN_ON_ONCE(nb_id == BAD_APICID);
-
-       for_each_online_cpu(i) {
-               nb = per_cpu(cpu_hw_events, i).amd_nb;
-               if (WARN_ON_ONCE(!nb))
-                       continue;
-
-               if (nb->nb_id == nb_id) {
-                       *onln = cpuc->amd_nb;
-                       cpuc->amd_nb = nb;
-                       break;
-               }
-       }
-
-       cpuc->amd_nb->nb_id = nb_id;
-       cpuc->amd_nb->refcnt++;
-}
-
-static void amd_pmu_cpu_dead(int cpu)
-{
-       struct cpu_hw_events *cpuhw;
-
-       if (boot_cpu_data.x86_max_cores < 2)
-               return;
-
-       cpuhw = &per_cpu(cpu_hw_events, cpu);
-
-       if (cpuhw->amd_nb) {
-               struct amd_nb *nb = cpuhw->amd_nb;
-
-               if (nb->nb_id == -1 || --nb->refcnt == 0)
-                       kfree(nb);
-
-               cpuhw->amd_nb = NULL;
-       }
-}
-
-static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event)
-{
-       /*
-        * if not NB event or no NB, then no constraints
-        */
-       if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
-               return &unconstrained;
-
-       return __amd_get_nb_event_constraints(cpuc, event, NULL);
-}
-
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
-                                     struct perf_event *event)
-{
-       if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
-               __amd_put_nb_event_constraints(cpuc, event);
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7,32-35");
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-
-static struct attribute *amd_format_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-/* AMD Family 15h */
-
-#define AMD_EVENT_TYPE_MASK    0x000000F0ULL
-
-#define AMD_EVENT_FP           0x00000000ULL ... 0x00000010ULL
-#define AMD_EVENT_LS           0x00000020ULL ... 0x00000030ULL
-#define AMD_EVENT_DC           0x00000040ULL ... 0x00000050ULL
-#define AMD_EVENT_CU           0x00000060ULL ... 0x00000070ULL
-#define AMD_EVENT_IC_DE                0x00000080ULL ... 0x00000090ULL
-#define AMD_EVENT_EX_LS                0x000000C0ULL
-#define AMD_EVENT_DE           0x000000D0ULL
-#define AMD_EVENT_NB           0x000000E0ULL ... 0x000000F0ULL
-
-/*
- * AMD family 15h event code/PMC mappings:
- *
- * type = event_code & 0x0F0:
- *
- * 0x000       FP      PERF_CTL[5:3]
- * 0x010       FP      PERF_CTL[5:3]
- * 0x020       LS      PERF_CTL[5:0]
- * 0x030       LS      PERF_CTL[5:0]
- * 0x040       DC      PERF_CTL[5:0]
- * 0x050       DC      PERF_CTL[5:0]
- * 0x060       CU      PERF_CTL[2:0]
- * 0x070       CU      PERF_CTL[2:0]
- * 0x080       IC/DE   PERF_CTL[2:0]
- * 0x090       IC/DE   PERF_CTL[2:0]
- * 0x0A0       ---
- * 0x0B0       ---
- * 0x0C0       EX/LS   PERF_CTL[5:0]
- * 0x0D0       DE      PERF_CTL[2:0]
- * 0x0E0       NB      NB_PERF_CTL[3:0]
- * 0x0F0       NB      NB_PERF_CTL[3:0]
- *
- * Exceptions:
- *
- * 0x000       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
- * 0x003       FP      PERF_CTL[3]
- * 0x004       FP      PERF_CTL[3], PERF_CTL[5:3] (*)
- * 0x00B       FP      PERF_CTL[3]
- * 0x00D       FP      PERF_CTL[3]
- * 0x023       DE      PERF_CTL[2:0]
- * 0x02D       LS      PERF_CTL[3]
- * 0x02E       LS      PERF_CTL[3,0]
- * 0x031       LS      PERF_CTL[2:0] (**)
- * 0x043       CU      PERF_CTL[2:0]
- * 0x045       CU      PERF_CTL[2:0]
- * 0x046       CU      PERF_CTL[2:0]
- * 0x054       CU      PERF_CTL[2:0]
- * 0x055       CU      PERF_CTL[2:0]
- * 0x08F       IC      PERF_CTL[0]
- * 0x187       DE      PERF_CTL[0]
- * 0x188       DE      PERF_CTL[0]
- * 0x0DB       EX      PERF_CTL[5:0]
- * 0x0DC       LS      PERF_CTL[5:0]
- * 0x0DD       LS      PERF_CTL[5:0]
- * 0x0DE       LS      PERF_CTL[5:0]
- * 0x0DF       LS      PERF_CTL[5:0]
- * 0x1C0       EX      PERF_CTL[5:3]
- * 0x1D6       EX      PERF_CTL[5:0]
- * 0x1D8       EX      PERF_CTL[5:0]
- *
- * (*)  depending on the umask all FPU counters may be used
- * (**) only one unitmask enabled at a time
- */
-
-static struct event_constraint amd_f15_PMC0  = EVENT_CONSTRAINT(0, 0x01, 0);
-static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
-static struct event_constraint amd_f15_PMC3  = EVENT_CONSTRAINT(0, 0x08, 0);
-static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
-static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
-static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
-
-static struct event_constraint *
-amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
-                              struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned int event_code = amd_get_event_code(hwc);
-
-       switch (event_code & AMD_EVENT_TYPE_MASK) {
-       case AMD_EVENT_FP:
-               switch (event_code) {
-               case 0x000:
-                       if (!(hwc->config & 0x0000F000ULL))
-                               break;
-                       if (!(hwc->config & 0x00000F00ULL))
-                               break;
-                       return &amd_f15_PMC3;
-               case 0x004:
-                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
-                               break;
-                       return &amd_f15_PMC3;
-               case 0x003:
-               case 0x00B:
-               case 0x00D:
-                       return &amd_f15_PMC3;
-               }
-               return &amd_f15_PMC53;
-       case AMD_EVENT_LS:
-       case AMD_EVENT_DC:
-       case AMD_EVENT_EX_LS:
-               switch (event_code) {
-               case 0x023:
-               case 0x043:
-               case 0x045:
-               case 0x046:
-               case 0x054:
-               case 0x055:
-                       return &amd_f15_PMC20;
-               case 0x02D:
-                       return &amd_f15_PMC3;
-               case 0x02E:
-                       return &amd_f15_PMC30;
-               case 0x031:
-                       if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
-                               return &amd_f15_PMC20;
-                       return &emptyconstraint;
-               case 0x1C0:
-                       return &amd_f15_PMC53;
-               default:
-                       return &amd_f15_PMC50;
-               }
-       case AMD_EVENT_CU:
-       case AMD_EVENT_IC_DE:
-       case AMD_EVENT_DE:
-               switch (event_code) {
-               case 0x08F:
-               case 0x187:
-               case 0x188:
-                       return &amd_f15_PMC0;
-               case 0x0DB ... 0x0DF:
-               case 0x1D6:
-               case 0x1D8:
-                       return &amd_f15_PMC50;
-               default:
-                       return &amd_f15_PMC20;
-               }
-       case AMD_EVENT_NB:
-               /* moved to perf_event_amd_uncore.c */
-               return &emptyconstraint;
-       default:
-               return &emptyconstraint;
-       }
-}
-
-static ssize_t amd_event_sysfs_show(char *page, u64 config)
-{
-       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
-                   (config & AMD64_EVENTSEL_EVENT) >> 24;
-
-       return x86_event_sysfs_show(page, config, event);
-}
-
-static __initconst const struct x86_pmu amd_pmu = {
-       .name                   = "AMD",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = x86_pmu_disable_all,
-       .enable_all             = x86_pmu_enable_all,
-       .enable                 = x86_pmu_enable_event,
-       .disable                = x86_pmu_disable_event,
-       .hw_config              = amd_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_K7_EVNTSEL0,
-       .perfctr                = MSR_K7_PERFCTR0,
-       .addr_offset            = amd_pmu_addr_offset,
-       .event_map              = amd_pmu_event_map,
-       .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
-       .num_counters           = AMD64_NUM_COUNTERS,
-       .cntval_bits            = 48,
-       .cntval_mask            = (1ULL << 48) - 1,
-       .apic                   = 1,
-       /* use highest bit to detect overflow */
-       .max_period             = (1ULL << 47) - 1,
-       .get_event_constraints  = amd_get_event_constraints,
-       .put_event_constraints  = amd_put_event_constraints,
-
-       .format_attrs           = amd_format_attr,
-       .events_sysfs_show      = amd_event_sysfs_show,
-
-       .cpu_prepare            = amd_pmu_cpu_prepare,
-       .cpu_starting           = amd_pmu_cpu_starting,
-       .cpu_dead               = amd_pmu_cpu_dead,
-};
-
-static int __init amd_core_pmu_init(void)
-{
-       if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-               return 0;
-
-       switch (boot_cpu_data.x86) {
-       case 0x15:
-               pr_cont("Fam15h ");
-               x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
-               break;
-
-       default:
-               pr_err("core perfctr but no constraints; unknown hardware!\n");
-               return -ENODEV;
-       }
-
-       /*
-        * If core performance counter extensions exists, we must use
-        * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
-        * amd_pmu_addr_offset().
-        */
-       x86_pmu.eventsel        = MSR_F15H_PERF_CTL;
-       x86_pmu.perfctr         = MSR_F15H_PERF_CTR;
-       x86_pmu.num_counters    = AMD64_NUM_COUNTERS_CORE;
-
-       pr_cont("core perfctr, ");
-       return 0;
-}
-
-__init int amd_pmu_init(void)
-{
-       int ret;
-
-       /* Performance-monitoring supported from K7 and later: */
-       if (boot_cpu_data.x86 < 6)
-               return -ENODEV;
-
-       x86_pmu = amd_pmu;
-
-       ret = amd_core_pmu_init();
-       if (ret)
-               return ret;
-
-       /* Events are common for all AMDs */
-       memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-              sizeof(hw_cache_event_ids));
-
-       return 0;
-}
-
-void amd_pmu_enable_virt(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       cpuc->perf_ctr_virt_mask = 0;
-
-       /* Reload all events */
-       x86_pmu_disable_all();
-       x86_pmu_enable_all(0);
-}
-EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
-
-void amd_pmu_disable_virt(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       /*
-        * We only mask out the Host-only bit so that host-only counting works
-        * when SVM is disabled. If someone sets up a guest-only counter when
-        * SVM is disabled the Guest-only bits still gets set and the counter
-        * will not count anything.
-        */
-       cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
-
-       /* Reload all events */
-       x86_pmu_disable_all();
-       x86_pmu_enable_all(0);
-}
-EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
deleted file mode 100644 (file)
index 989d3c2..0000000
+++ /dev/null
@@ -1,959 +0,0 @@
-/*
- * Performance events - AMD IBS
- *
- *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ptrace.h>
-#include <linux/syscore_ops.h>
-
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-static u32 ibs_caps;
-
-#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
-
-#include <linux/kprobes.h>
-#include <linux/hardirq.h>
-
-#include <asm/nmi.h>
-
-#define IBS_FETCH_CONFIG_MASK  (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
-#define IBS_OP_CONFIG_MASK     IBS_OP_MAX_CNT
-
-enum ibs_states {
-       IBS_ENABLED     = 0,
-       IBS_STARTED     = 1,
-       IBS_STOPPING    = 2,
-
-       IBS_MAX_STATES,
-};
-
-struct cpu_perf_ibs {
-       struct perf_event       *event;
-       unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
-};
-
-struct perf_ibs {
-       struct pmu                      pmu;
-       unsigned int                    msr;
-       u64                             config_mask;
-       u64                             cnt_mask;
-       u64                             enable_mask;
-       u64                             valid_mask;
-       u64                             max_period;
-       unsigned long                   offset_mask[1];
-       int                             offset_max;
-       struct cpu_perf_ibs __percpu    *pcpu;
-
-       struct attribute                **format_attrs;
-       struct attribute_group          format_group;
-       const struct attribute_group    *attr_groups[2];
-
-       u64                             (*get_count)(u64 config);
-};
-
-struct perf_ibs_data {
-       u32             size;
-       union {
-               u32     data[0];        /* data buffer starts here */
-               u32     caps;
-       };
-       u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
-};
-
-static int
-perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
-{
-       s64 left = local64_read(&hwc->period_left);
-       s64 period = hwc->sample_period;
-       int overflow = 0;
-
-       /*
-        * If we are way outside a reasonable range then just skip forward:
-        */
-       if (unlikely(left <= -period)) {
-               left = period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               overflow = 1;
-       }
-
-       if (unlikely(left < (s64)min)) {
-               left += period;
-               local64_set(&hwc->period_left, left);
-               hwc->last_period = period;
-               overflow = 1;
-       }
-
-       /*
-        * If the hw period that triggers the sw overflow is too short
-        * we might hit the irq handler. This biases the results.
-        * Thus we shorten the next-to-last period and set the last
-        * period to the max period.
-        */
-       if (left > max) {
-               left -= max;
-               if (left > max)
-                       left = max;
-               else if (left < min)
-                       left = min;
-       }
-
-       *hw_period = (u64)left;
-
-       return overflow;
-}
-
-static  int
-perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       int shift = 64 - width;
-       u64 prev_raw_count;
-       u64 delta;
-
-       /*
-        * Careful: an NMI might modify the previous event value.
-        *
-        * Our tactic to handle this is to first atomically read and
-        * exchange a new raw count - then add that new-prev delta
-        * count to the generic event atomically:
-        */
-       prev_raw_count = local64_read(&hwc->prev_count);
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       new_raw_count) != prev_raw_count)
-               return 0;
-
-       /*
-        * Now we have the new raw value and have updated the prev
-        * timestamp already. We can now calculate the elapsed delta
-        * (event-)time and add that to the generic event.
-        *
-        * Careful, not all hw sign-extends above the physical width
-        * of the count.
-        */
-       delta = (new_raw_count << shift) - (prev_raw_count << shift);
-       delta >>= shift;
-
-       local64_add(delta, &event->count);
-       local64_sub(delta, &hwc->period_left);
-
-       return 1;
-}
-
-static struct perf_ibs perf_ibs_fetch;
-static struct perf_ibs perf_ibs_op;
-
-static struct perf_ibs *get_ibs_pmu(int type)
-{
-       if (perf_ibs_fetch.pmu.type == type)
-               return &perf_ibs_fetch;
-       if (perf_ibs_op.pmu.type == type)
-               return &perf_ibs_op;
-       return NULL;
-}
-
-/*
- * Use IBS for precise event sampling:
- *
- *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
- *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
- *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
- *
- * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
- * MSRC001_1033) is used to select either cycle or micro-ops counting
- * mode.
- *
- * The rip of IBS samples has skid 0. Thus, IBS supports precise
- * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
- * rip is invalid when IBS was not able to record the rip correctly.
- * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
- *
- */
-static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
-{
-       switch (event->attr.precise_ip) {
-       case 0:
-               return -ENOENT;
-       case 1:
-       case 2:
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       switch (event->attr.type) {
-       case PERF_TYPE_HARDWARE:
-               switch (event->attr.config) {
-               case PERF_COUNT_HW_CPU_CYCLES:
-                       *config = 0;
-                       return 0;
-               }
-               break;
-       case PERF_TYPE_RAW:
-               switch (event->attr.config) {
-               case 0x0076:
-                       *config = 0;
-                       return 0;
-               case 0x00C1:
-                       *config = IBS_OP_CNT_CTL;
-                       return 0;
-               }
-               break;
-       default:
-               return -ENOENT;
-       }
-
-       return -EOPNOTSUPP;
-}
-
-static const struct perf_event_attr ibs_notsupp = {
-       .exclude_user   = 1,
-       .exclude_kernel = 1,
-       .exclude_hv     = 1,
-       .exclude_idle   = 1,
-       .exclude_host   = 1,
-       .exclude_guest  = 1,
-};
-
-static int perf_ibs_init(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_ibs *perf_ibs;
-       u64 max_cnt, config;
-       int ret;
-
-       perf_ibs = get_ibs_pmu(event->attr.type);
-       if (perf_ibs) {
-               config = event->attr.config;
-       } else {
-               perf_ibs = &perf_ibs_op;
-               ret = perf_ibs_precise_event(event, &config);
-               if (ret)
-                       return ret;
-       }
-
-       if (event->pmu != &perf_ibs->pmu)
-               return -ENOENT;
-
-       if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
-               return -EINVAL;
-
-       if (config & ~perf_ibs->config_mask)
-               return -EINVAL;
-
-       if (hwc->sample_period) {
-               if (config & perf_ibs->cnt_mask)
-                       /* raw max_cnt may not be set */
-                       return -EINVAL;
-               if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
-                       /*
-                        * lower 4 bits can not be set in ibs max cnt,
-                        * but allowing it in case we adjust the
-                        * sample period to set a frequency.
-                        */
-                       return -EINVAL;
-               hwc->sample_period &= ~0x0FULL;
-               if (!hwc->sample_period)
-                       hwc->sample_period = 0x10;
-       } else {
-               max_cnt = config & perf_ibs->cnt_mask;
-               config &= ~perf_ibs->cnt_mask;
-               event->attr.sample_period = max_cnt << 4;
-               hwc->sample_period = event->attr.sample_period;
-       }
-
-       if (!hwc->sample_period)
-               return -EINVAL;
-
-       /*
-        * If we modify hwc->sample_period, we also need to update
-        * hwc->last_period and hwc->period_left.
-        */
-       hwc->last_period = hwc->sample_period;
-       local64_set(&hwc->period_left, hwc->sample_period);
-
-       hwc->config_base = perf_ibs->msr;
-       hwc->config = config;
-
-       return 0;
-}
-
-static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
-                              struct hw_perf_event *hwc, u64 *period)
-{
-       int overflow;
-
-       /* ignore lower 4 bits in min count: */
-       overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
-       local64_set(&hwc->prev_count, 0);
-
-       return overflow;
-}
-
-static u64 get_ibs_fetch_count(u64 config)
-{
-       return (config & IBS_FETCH_CNT) >> 12;
-}
-
-static u64 get_ibs_op_count(u64 config)
-{
-       u64 count = 0;
-
-       if (config & IBS_OP_VAL)
-               count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
-
-       if (ibs_caps & IBS_CAPS_RDWROPCNT)
-               count += (config & IBS_OP_CUR_CNT) >> 32;
-
-       return count;
-}
-
-static void
-perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
-                     u64 *config)
-{
-       u64 count = perf_ibs->get_count(*config);
-
-       /*
-        * Set width to 64 since we do not overflow on max width but
-        * instead on max count. In perf_ibs_set_period() we clear
-        * prev count manually on overflow.
-        */
-       while (!perf_event_try_update(event, count, 64)) {
-               rdmsrl(event->hw.config_base, *config);
-               count = perf_ibs->get_count(*config);
-       }
-}
-
-static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
-                                        struct hw_perf_event *hwc, u64 config)
-{
-       wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
-}
-
-/*
- * Erratum #420 Instruction-Based Sampling Engine May Generate
- * Interrupt that Cannot Be Cleared:
- *
- * Must clear counter mask first, then clear the enable bit. See
- * Revision Guide for AMD Family 10h Processors, Publication #41322.
- */
-static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
-                                         struct hw_perf_event *hwc, u64 config)
-{
-       config &= ~perf_ibs->cnt_mask;
-       wrmsrl(hwc->config_base, config);
-       config &= ~perf_ibs->enable_mask;
-       wrmsrl(hwc->config_base, config);
-}
-
-/*
- * We cannot restore the ibs pmu state, so we always needs to update
- * the event while stopping it and then reset the state when starting
- * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
- * perf_ibs_start()/perf_ibs_stop() and instead always do it.
- */
-static void perf_ibs_start(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-       u64 period;
-
-       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
-               return;
-
-       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-       hwc->state = 0;
-
-       perf_ibs_set_period(perf_ibs, hwc, &period);
-       set_bit(IBS_STARTED, pcpu->state);
-       perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
-
-       perf_event_update_userpage(event);
-}
-
-static void perf_ibs_stop(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-       u64 config;
-       int stopping;
-
-       stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
-
-       if (!stopping && (hwc->state & PERF_HES_UPTODATE))
-               return;
-
-       rdmsrl(hwc->config_base, config);
-
-       if (stopping) {
-               set_bit(IBS_STOPPING, pcpu->state);
-               perf_ibs_disable_event(perf_ibs, hwc, config);
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-       }
-
-       if (hwc->state & PERF_HES_UPTODATE)
-               return;
-
-       /*
-        * Clear valid bit to not count rollovers on update, rollovers
-        * are only updated in the irq handler.
-        */
-       config &= ~perf_ibs->valid_mask;
-
-       perf_ibs_event_update(perf_ibs, event, &config);
-       hwc->state |= PERF_HES_UPTODATE;
-}
-
-static int perf_ibs_add(struct perf_event *event, int flags)
-{
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-
-       if (test_and_set_bit(IBS_ENABLED, pcpu->state))
-               return -ENOSPC;
-
-       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       pcpu->event = event;
-
-       if (flags & PERF_EF_START)
-               perf_ibs_start(event, PERF_EF_RELOAD);
-
-       return 0;
-}
-
-static void perf_ibs_del(struct perf_event *event, int flags)
-{
-       struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-
-       if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
-               return;
-
-       perf_ibs_stop(event, PERF_EF_UPDATE);
-
-       pcpu->event = NULL;
-
-       perf_event_update_userpage(event);
-}
-
-static void perf_ibs_read(struct perf_event *event) { }
-
-PMU_FORMAT_ATTR(rand_en,       "config:57");
-PMU_FORMAT_ATTR(cnt_ctl,       "config:19");
-
-static struct attribute *ibs_fetch_format_attrs[] = {
-       &format_attr_rand_en.attr,
-       NULL,
-};
-
-static struct attribute *ibs_op_format_attrs[] = {
-       NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
-       NULL,
-};
-
-static struct perf_ibs perf_ibs_fetch = {
-       .pmu = {
-               .task_ctx_nr    = perf_invalid_context,
-
-               .event_init     = perf_ibs_init,
-               .add            = perf_ibs_add,
-               .del            = perf_ibs_del,
-               .start          = perf_ibs_start,
-               .stop           = perf_ibs_stop,
-               .read           = perf_ibs_read,
-       },
-       .msr                    = MSR_AMD64_IBSFETCHCTL,
-       .config_mask            = IBS_FETCH_CONFIG_MASK,
-       .cnt_mask               = IBS_FETCH_MAX_CNT,
-       .enable_mask            = IBS_FETCH_ENABLE,
-       .valid_mask             = IBS_FETCH_VAL,
-       .max_period             = IBS_FETCH_MAX_CNT << 4,
-       .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
-       .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
-       .format_attrs           = ibs_fetch_format_attrs,
-
-       .get_count              = get_ibs_fetch_count,
-};
-
-static struct perf_ibs perf_ibs_op = {
-       .pmu = {
-               .task_ctx_nr    = perf_invalid_context,
-
-               .event_init     = perf_ibs_init,
-               .add            = perf_ibs_add,
-               .del            = perf_ibs_del,
-               .start          = perf_ibs_start,
-               .stop           = perf_ibs_stop,
-               .read           = perf_ibs_read,
-       },
-       .msr                    = MSR_AMD64_IBSOPCTL,
-       .config_mask            = IBS_OP_CONFIG_MASK,
-       .cnt_mask               = IBS_OP_MAX_CNT,
-       .enable_mask            = IBS_OP_ENABLE,
-       .valid_mask             = IBS_OP_VAL,
-       .max_period             = IBS_OP_MAX_CNT << 4,
-       .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
-       .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
-       .format_attrs           = ibs_op_format_attrs,
-
-       .get_count              = get_ibs_op_count,
-};
-
-static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
-{
-       struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
-       struct perf_event *event = pcpu->event;
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_sample_data data;
-       struct perf_raw_record raw;
-       struct pt_regs regs;
-       struct perf_ibs_data ibs_data;
-       int offset, size, check_rip, offset_max, throttle = 0;
-       unsigned int msr;
-       u64 *buf, *config, period;
-
-       if (!test_bit(IBS_STARTED, pcpu->state)) {
-               /*
-                * Catch spurious interrupts after stopping IBS: After
-                * disabling IBS there could be still incoming NMIs
-                * with samples that even have the valid bit cleared.
-                * Mark all this NMIs as handled.
-                */
-               return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
-       }
-
-       msr = hwc->config_base;
-       buf = ibs_data.regs;
-       rdmsrl(msr, *buf);
-       if (!(*buf++ & perf_ibs->valid_mask))
-               return 0;
-
-       config = &ibs_data.regs[0];
-       perf_ibs_event_update(perf_ibs, event, config);
-       perf_sample_data_init(&data, 0, hwc->last_period);
-       if (!perf_ibs_set_period(perf_ibs, hwc, &period))
-               goto out;       /* no sw counter overflow */
-
-       ibs_data.caps = ibs_caps;
-       size = 1;
-       offset = 1;
-       check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
-       if (event->attr.sample_type & PERF_SAMPLE_RAW)
-               offset_max = perf_ibs->offset_max;
-       else if (check_rip)
-               offset_max = 2;
-       else
-               offset_max = 1;
-       do {
-               rdmsrl(msr + offset, *buf++);
-               size++;
-               offset = find_next_bit(perf_ibs->offset_mask,
-                                      perf_ibs->offset_max,
-                                      offset + 1);
-       } while (offset < offset_max);
-       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
-               /*
-                * Read IbsBrTarget and IbsOpData4 separately
-                * depending on their availability.
-                * Can't add to offset_max as they are staggered
-                */
-               if (ibs_caps & IBS_CAPS_BRNTRGT) {
-                       rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
-                       size++;
-               }
-               if (ibs_caps & IBS_CAPS_OPDATA4) {
-                       rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
-                       size++;
-               }
-       }
-       ibs_data.size = sizeof(u64) * size;
-
-       regs = *iregs;
-       if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
-               regs.flags &= ~PERF_EFLAGS_EXACT;
-       } else {
-               set_linear_ip(&regs, ibs_data.regs[1]);
-               regs.flags |= PERF_EFLAGS_EXACT;
-       }
-
-       if (event->attr.sample_type & PERF_SAMPLE_RAW) {
-               raw.size = sizeof(u32) + ibs_data.size;
-               raw.data = ibs_data.data;
-               data.raw = &raw;
-       }
-
-       throttle = perf_event_overflow(event, &data, &regs);
-out:
-       if (throttle)
-               perf_ibs_disable_event(perf_ibs, hwc, *config);
-       else
-               perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
-
-       perf_event_update_userpage(event);
-
-       return 1;
-}
-
-static int
-perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
-{
-       int handled = 0;
-
-       handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
-       handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
-
-       if (handled)
-               inc_irq_stat(apic_perf_irqs);
-
-       return handled;
-}
-NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
-
-static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
-{
-       struct cpu_perf_ibs __percpu *pcpu;
-       int ret;
-
-       pcpu = alloc_percpu(struct cpu_perf_ibs);
-       if (!pcpu)
-               return -ENOMEM;
-
-       perf_ibs->pcpu = pcpu;
-
-       /* register attributes */
-       if (perf_ibs->format_attrs[0]) {
-               memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
-               perf_ibs->format_group.name     = "format";
-               perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
-
-               memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
-               perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
-               perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
-       }
-
-       ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
-       if (ret) {
-               perf_ibs->pcpu = NULL;
-               free_percpu(pcpu);
-       }
-
-       return ret;
-}
-
-static __init int perf_event_ibs_init(void)
-{
-       struct attribute **attr = ibs_op_format_attrs;
-
-       if (!ibs_caps)
-               return -ENODEV; /* ibs not supported by the cpu */
-
-       perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
-
-       if (ibs_caps & IBS_CAPS_OPCNT) {
-               perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
-               *attr++ = &format_attr_cnt_ctl.attr;
-       }
-       perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
-
-       register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
-       printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-
-       return 0;
-}
-
-#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
-
-static __init int perf_event_ibs_init(void) { return 0; }
-
-#endif
-
-/* IBS - apic initialization, for perf and oprofile */
-
-static __init u32 __get_ibs_caps(void)
-{
-       u32 caps;
-       unsigned int max_level;
-
-       if (!boot_cpu_has(X86_FEATURE_IBS))
-               return 0;
-
-       /* check IBS cpuid feature flags */
-       max_level = cpuid_eax(0x80000000);
-       if (max_level < IBS_CPUID_FEATURES)
-               return IBS_CAPS_DEFAULT;
-
-       caps = cpuid_eax(IBS_CPUID_FEATURES);
-       if (!(caps & IBS_CAPS_AVAIL))
-               /* cpuid flags not valid */
-               return IBS_CAPS_DEFAULT;
-
-       return caps;
-}
-
-u32 get_ibs_caps(void)
-{
-       return ibs_caps;
-}
-
-EXPORT_SYMBOL(get_ibs_caps);
-
-static inline int get_eilvt(int offset)
-{
-       return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
-}
-
-static inline int put_eilvt(int offset)
-{
-       return !setup_APIC_eilvt(offset, 0, 0, 1);
-}
-
-/*
- * Check and reserve APIC extended interrupt LVT offset for IBS if available.
- */
-static inline int ibs_eilvt_valid(void)
-{
-       int offset;
-       u64 val;
-       int valid = 0;
-
-       preempt_disable();
-
-       rdmsrl(MSR_AMD64_IBSCTL, val);
-       offset = val & IBSCTL_LVT_OFFSET_MASK;
-
-       if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
-               pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
-                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-               goto out;
-       }
-
-       if (!get_eilvt(offset)) {
-               pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
-                      smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
-               goto out;
-       }
-
-       valid = 1;
-out:
-       preempt_enable();
-
-       return valid;
-}
-
-static int setup_ibs_ctl(int ibs_eilvt_off)
-{
-       struct pci_dev *cpu_cfg;
-       int nodes;
-       u32 value = 0;
-
-       nodes = 0;
-       cpu_cfg = NULL;
-       do {
-               cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
-                                        PCI_DEVICE_ID_AMD_10H_NB_MISC,
-                                        cpu_cfg);
-               if (!cpu_cfg)
-                       break;
-               ++nodes;
-               pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
-                                      | IBSCTL_LVT_OFFSET_VALID);
-               pci_read_config_dword(cpu_cfg, IBSCTL, &value);
-               if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
-                       pci_dev_put(cpu_cfg);
-                       printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
-                              "IBSCTL = 0x%08x\n", value);
-                       return -EINVAL;
-               }
-       } while (1);
-
-       if (!nodes) {
-               printk(KERN_DEBUG "No CPU node configured for IBS\n");
-               return -ENODEV;
-       }
-
-       return 0;
-}
-
-/*
- * This runs only on the current cpu. We try to find an LVT offset and
- * setup the local APIC. For this we must disable preemption. On
- * success we initialize all nodes with this offset. This updates then
- * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
- * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
- * is using the new offset.
- */
-static void force_ibs_eilvt_setup(void)
-{
-       int offset;
-       int ret;
-
-       preempt_disable();
-       /* find the next free available EILVT entry, skip offset 0 */
-       for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
-               if (get_eilvt(offset))
-                       break;
-       }
-       preempt_enable();
-
-       if (offset == APIC_EILVT_NR_MAX) {
-               printk(KERN_DEBUG "No EILVT entry available\n");
-               return;
-       }
-
-       ret = setup_ibs_ctl(offset);
-       if (ret)
-               goto out;
-
-       if (!ibs_eilvt_valid())
-               goto out;
-
-       pr_info("IBS: LVT offset %d assigned\n", offset);
-
-       return;
-out:
-       preempt_disable();
-       put_eilvt(offset);
-       preempt_enable();
-       return;
-}
-
-static void ibs_eilvt_setup(void)
-{
-       /*
-        * Force LVT offset assignment for family 10h: The offsets are
-        * not assigned by the BIOS for this family, so the OS is
-        * responsible for doing it. If the OS assignment fails, fall
-        * back to BIOS settings and try to setup this.
-        */
-       if (boot_cpu_data.x86 == 0x10)
-               force_ibs_eilvt_setup();
-}
-
-static inline int get_ibs_lvt_offset(void)
-{
-       u64 val;
-
-       rdmsrl(MSR_AMD64_IBSCTL, val);
-       if (!(val & IBSCTL_LVT_OFFSET_VALID))
-               return -EINVAL;
-
-       return val & IBSCTL_LVT_OFFSET_MASK;
-}
-
-static void setup_APIC_ibs(void *dummy)
-{
-       int offset;
-
-       offset = get_ibs_lvt_offset();
-       if (offset < 0)
-               goto failed;
-
-       if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
-               return;
-failed:
-       pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
-               smp_processor_id());
-}
-
-static void clear_APIC_ibs(void *dummy)
-{
-       int offset;
-
-       offset = get_ibs_lvt_offset();
-       if (offset >= 0)
-               setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
-}
-
-#ifdef CONFIG_PM
-
-static int perf_ibs_suspend(void)
-{
-       clear_APIC_ibs(NULL);
-       return 0;
-}
-
-static void perf_ibs_resume(void)
-{
-       ibs_eilvt_setup();
-       setup_APIC_ibs(NULL);
-}
-
-static struct syscore_ops perf_ibs_syscore_ops = {
-       .resume         = perf_ibs_resume,
-       .suspend        = perf_ibs_suspend,
-};
-
-static void perf_ibs_pm_init(void)
-{
-       register_syscore_ops(&perf_ibs_syscore_ops);
-}
-
-#else
-
-static inline void perf_ibs_pm_init(void) { }
-
-#endif
-
-static int
-perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_STARTING:
-               setup_APIC_ibs(NULL);
-               break;
-       case CPU_DYING:
-               clear_APIC_ibs(NULL);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static __init int amd_ibs_init(void)
-{
-       u32 caps;
-       int ret = -EINVAL;
-
-       caps = __get_ibs_caps();
-       if (!caps)
-               return -ENODEV; /* ibs not supported by the cpu */
-
-       ibs_eilvt_setup();
-
-       if (!ibs_eilvt_valid())
-               goto out;
-
-       perf_ibs_pm_init();
-       cpu_notifier_register_begin();
-       ibs_caps = caps;
-       /* make ibs_caps visible to other cpus: */
-       smp_mb();
-       smp_call_function(setup_APIC_ibs, NULL, 1);
-       __perf_cpu_notifier(perf_ibs_cpu_notifier);
-       cpu_notifier_register_done();
-
-       ret = perf_event_ibs_init();
-out:
-       if (ret)
-               pr_err("Failed to setup IBS, %d\n", ret);
-       return ret;
-}
-
-/* Since we need the pci subsystem to init ibs we can't do this earlier: */
-device_initcall(amd_ibs_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
deleted file mode 100644 (file)
index 97242a9..0000000
+++ /dev/null
@@ -1,499 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Steven Kinney <Steven.Kinney@amd.com>
- * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
- *
- * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/perf_event.h>
-#include <linux/module.h>
-#include <linux/cpumask.h>
-#include <linux/slab.h>
-
-#include "perf_event.h"
-#include "perf_event_amd_iommu.h"
-
-#define COUNTER_SHIFT          16
-
-#define _GET_BANK(ev)       ((u8)(ev->hw.extra_reg.reg >> 8))
-#define _GET_CNTR(ev)       ((u8)(ev->hw.extra_reg.reg))
-
-/* iommu pmu config masks */
-#define _GET_CSOURCE(ev)    ((ev->hw.config & 0xFFULL))
-#define _GET_DEVID(ev)      ((ev->hw.config >> 8)  & 0xFFFFULL)
-#define _GET_PASID(ev)      ((ev->hw.config >> 24) & 0xFFFFULL)
-#define _GET_DOMID(ev)      ((ev->hw.config >> 40) & 0xFFFFULL)
-#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config)  & 0xFFFFULL)
-#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
-#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
-
-static struct perf_amd_iommu __perf_iommu;
-
-struct perf_amd_iommu {
-       struct pmu pmu;
-       u8 max_banks;
-       u8 max_counters;
-       u64 cntr_assign_mask;
-       raw_spinlock_t lock;
-       const struct attribute_group *attr_groups[4];
-};
-
-#define format_group   attr_groups[0]
-#define cpumask_group  attr_groups[1]
-#define events_group   attr_groups[2]
-#define null_group     attr_groups[3]
-
-/*---------------------------------------------
- * sysfs format attributes
- *---------------------------------------------*/
-PMU_FORMAT_ATTR(csource,    "config:0-7");
-PMU_FORMAT_ATTR(devid,      "config:8-23");
-PMU_FORMAT_ATTR(pasid,      "config:24-39");
-PMU_FORMAT_ATTR(domid,      "config:40-55");
-PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
-PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
-PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
-
-static struct attribute *iommu_format_attrs[] = {
-       &format_attr_csource.attr,
-       &format_attr_devid.attr,
-       &format_attr_pasid.attr,
-       &format_attr_domid.attr,
-       &format_attr_devid_mask.attr,
-       &format_attr_pasid_mask.attr,
-       &format_attr_domid_mask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_iommu_format_group = {
-       .name = "format",
-       .attrs = iommu_format_attrs,
-};
-
-/*---------------------------------------------
- * sysfs events attributes
- *---------------------------------------------*/
-struct amd_iommu_event_desc {
-       struct kobj_attribute attr;
-       const char *event;
-};
-
-static ssize_t _iommu_event_show(struct kobject *kobj,
-                               struct kobj_attribute *attr, char *buf)
-{
-       struct amd_iommu_event_desc *event =
-               container_of(attr, struct amd_iommu_event_desc, attr);
-       return sprintf(buf, "%s\n", event->event);
-}
-
-#define AMD_IOMMU_EVENT_DESC(_name, _event)                    \
-{                                                              \
-       .attr  = __ATTR(_name, 0444, _iommu_event_show, NULL),  \
-       .event = _event,                                        \
-}
-
-static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
-       AMD_IOMMU_EVENT_DESC(mem_pass_untrans,        "csource=0x01"),
-       AMD_IOMMU_EVENT_DESC(mem_pass_pretrans,       "csource=0x02"),
-       AMD_IOMMU_EVENT_DESC(mem_pass_excl,           "csource=0x03"),
-       AMD_IOMMU_EVENT_DESC(mem_target_abort,        "csource=0x04"),
-       AMD_IOMMU_EVENT_DESC(mem_trans_total,         "csource=0x05"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit,   "csource=0x06"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis,   "csource=0x07"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit,   "csource=0x08"),
-       AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis,   "csource=0x09"),
-       AMD_IOMMU_EVENT_DESC(mem_dte_hit,             "csource=0x0a"),
-       AMD_IOMMU_EVENT_DESC(mem_dte_mis,             "csource=0x0b"),
-       AMD_IOMMU_EVENT_DESC(page_tbl_read_tot,       "csource=0x0c"),
-       AMD_IOMMU_EVENT_DESC(page_tbl_read_nst,       "csource=0x0d"),
-       AMD_IOMMU_EVENT_DESC(page_tbl_read_gst,       "csource=0x0e"),
-       AMD_IOMMU_EVENT_DESC(int_dte_hit,             "csource=0x0f"),
-       AMD_IOMMU_EVENT_DESC(int_dte_mis,             "csource=0x10"),
-       AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
-       AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
-       AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
-       { /* end: all zeroes */ },
-};
-
-/*---------------------------------------------
- * sysfs cpumask attributes
- *---------------------------------------------*/
-static cpumask_t iommu_cpumask;
-
-static ssize_t _iommu_cpumask_show(struct device *dev,
-                                  struct device_attribute *attr,
-                                  char *buf)
-{
-       return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
-}
-static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
-
-static struct attribute *iommu_cpumask_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_iommu_cpumask_group = {
-       .attrs = iommu_cpumask_attrs,
-};
-
-/*---------------------------------------------*/
-
-static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
-{
-       unsigned long flags;
-       int shift, bank, cntr, retval;
-       int max_banks = perf_iommu->max_banks;
-       int max_cntrs = perf_iommu->max_counters;
-
-       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
-
-       for (bank = 0, shift = 0; bank < max_banks; bank++) {
-               for (cntr = 0; cntr < max_cntrs; cntr++) {
-                       shift = bank + (bank*3) + cntr;
-                       if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
-                               continue;
-                       } else {
-                               perf_iommu->cntr_assign_mask |= (1ULL<<shift);
-                               retval = ((u16)((u16)bank<<8) | (u8)(cntr));
-                               goto out;
-                       }
-               }
-       }
-       retval = -ENOSPC;
-out:
-       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
-       return retval;
-}
-
-static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
-                                       u8 bank, u8 cntr)
-{
-       unsigned long flags;
-       int max_banks, max_cntrs;
-       int shift = 0;
-
-       max_banks = perf_iommu->max_banks;
-       max_cntrs = perf_iommu->max_counters;
-
-       if ((bank > max_banks) || (cntr > max_cntrs))
-               return -EINVAL;
-
-       shift = bank + cntr + (bank*3);
-
-       raw_spin_lock_irqsave(&perf_iommu->lock, flags);
-       perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
-       raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
-
-       return 0;
-}
-
-static int perf_iommu_event_init(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct perf_amd_iommu *perf_iommu;
-       u64 config, config1;
-
-       /* test the event attr type check for PMU enumeration */
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       /*
-        * IOMMU counters are shared across all cores.
-        * Therefore, it does not support per-process mode.
-        * Also, it does not support event sampling mode.
-        */
-       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
-               return -EINVAL;
-
-       /* IOMMU counters do not have usr/os/guest/host bits */
-       if (event->attr.exclude_user || event->attr.exclude_kernel ||
-           event->attr.exclude_host || event->attr.exclude_guest)
-               return -EINVAL;
-
-       if (event->cpu < 0)
-               return -EINVAL;
-
-       perf_iommu = &__perf_iommu;
-
-       if (event->pmu != &perf_iommu->pmu)
-               return -ENOENT;
-
-       if (perf_iommu) {
-               config = event->attr.config;
-               config1 = event->attr.config1;
-       } else {
-               return -EINVAL;
-       }
-
-       /* integrate with iommu base devid (0000), assume one iommu */
-       perf_iommu->max_banks =
-               amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
-       perf_iommu->max_counters =
-               amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
-       if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
-               return -EINVAL;
-
-       /* update the hw_perf_event struct with the iommu config data */
-       hwc->config = config;
-       hwc->extra_reg.config = config1;
-
-       return 0;
-}
-
-static void perf_iommu_enable_event(struct perf_event *ev)
-{
-       u8 csource = _GET_CSOURCE(ev);
-       u16 devid = _GET_DEVID(ev);
-       u64 reg = 0ULL;
-
-       reg = csource;
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_COUNTER_SRC_REG, &reg, true);
-
-       reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
-       if (reg)
-               reg |= (1UL << 31);
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_DEVID_MATCH_REG, &reg, true);
-
-       reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
-       if (reg)
-               reg |= (1UL << 31);
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_PASID_MATCH_REG, &reg, true);
-
-       reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
-       if (reg)
-               reg |= (1UL << 31);
-       amd_iommu_pc_get_set_reg_val(devid,
-                       _GET_BANK(ev), _GET_CNTR(ev) ,
-                        IOMMU_PC_DOMID_MATCH_REG, &reg, true);
-}
-
-static void perf_iommu_disable_event(struct perf_event *event)
-{
-       u64 reg = 0ULL;
-
-       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-                       _GET_BANK(event), _GET_CNTR(event),
-                       IOMMU_PC_COUNTER_SRC_REG, &reg, true);
-}
-
-static void perf_iommu_start(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       pr_debug("perf: amd_iommu:perf_iommu_start\n");
-       if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
-               return;
-
-       WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-       hwc->state = 0;
-
-       if (flags & PERF_EF_RELOAD) {
-               u64 prev_raw_count =  local64_read(&hwc->prev_count);
-               amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-                               _GET_BANK(event), _GET_CNTR(event),
-                               IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
-       }
-
-       perf_iommu_enable_event(event);
-       perf_event_update_userpage(event);
-
-}
-
-static void perf_iommu_read(struct perf_event *event)
-{
-       u64 count = 0ULL;
-       u64 prev_raw_count = 0ULL;
-       u64 delta = 0ULL;
-       struct hw_perf_event *hwc = &event->hw;
-       pr_debug("perf: amd_iommu:perf_iommu_read\n");
-
-       amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
-                               _GET_BANK(event), _GET_CNTR(event),
-                               IOMMU_PC_COUNTER_REG, &count, false);
-
-       /* IOMMU pc counter register is only 48 bits */
-       count &= 0xFFFFFFFFFFFFULL;
-
-       prev_raw_count =  local64_read(&hwc->prev_count);
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       count) != prev_raw_count)
-               return;
-
-       /* Handling 48-bit counter overflowing */
-       delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
-       delta >>= COUNTER_SHIFT;
-       local64_add(delta, &event->count);
-
-}
-
-static void perf_iommu_stop(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 config;
-
-       pr_debug("perf: amd_iommu:perf_iommu_stop\n");
-
-       if (hwc->state & PERF_HES_UPTODATE)
-               return;
-
-       perf_iommu_disable_event(event);
-       WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-       hwc->state |= PERF_HES_STOPPED;
-
-       if (hwc->state & PERF_HES_UPTODATE)
-               return;
-
-       config = hwc->config;
-       perf_iommu_read(event);
-       hwc->state |= PERF_HES_UPTODATE;
-}
-
-static int perf_iommu_add(struct perf_event *event, int flags)
-{
-       int retval;
-       struct perf_amd_iommu *perf_iommu =
-                       container_of(event->pmu, struct perf_amd_iommu, pmu);
-
-       pr_debug("perf: amd_iommu:perf_iommu_add\n");
-       event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       /* request an iommu bank/counter */
-       retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
-       if (retval != -ENOSPC)
-               event->hw.extra_reg.reg = (u16)retval;
-       else
-               return retval;
-
-       if (flags & PERF_EF_START)
-               perf_iommu_start(event, PERF_EF_RELOAD);
-
-       return 0;
-}
-
-static void perf_iommu_del(struct perf_event *event, int flags)
-{
-       struct perf_amd_iommu *perf_iommu =
-                       container_of(event->pmu, struct perf_amd_iommu, pmu);
-
-       pr_debug("perf: amd_iommu:perf_iommu_del\n");
-       perf_iommu_stop(event, PERF_EF_UPDATE);
-
-       /* clear the assigned iommu bank/counter */
-       clear_avail_iommu_bnk_cntr(perf_iommu,
-                                    _GET_BANK(event),
-                                    _GET_CNTR(event));
-
-       perf_event_update_userpage(event);
-}
-
-static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
-{
-       struct attribute **attrs;
-       struct attribute_group *attr_group;
-       int i = 0, j;
-
-       while (amd_iommu_v2_event_descs[i].attr.attr.name)
-               i++;
-
-       attr_group = kzalloc(sizeof(struct attribute *)
-               * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
-       if (!attr_group)
-               return -ENOMEM;
-
-       attrs = (struct attribute **)(attr_group + 1);
-       for (j = 0; j < i; j++)
-               attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
-
-       attr_group->name = "events";
-       attr_group->attrs = attrs;
-       perf_iommu->events_group = attr_group;
-
-       return 0;
-}
-
-static __init void amd_iommu_pc_exit(void)
-{
-       if (__perf_iommu.events_group != NULL) {
-               kfree(__perf_iommu.events_group);
-               __perf_iommu.events_group = NULL;
-       }
-}
-
-static __init int _init_perf_amd_iommu(
-       struct perf_amd_iommu *perf_iommu, char *name)
-{
-       int ret;
-
-       raw_spin_lock_init(&perf_iommu->lock);
-
-       /* Init format attributes */
-       perf_iommu->format_group = &amd_iommu_format_group;
-
-       /* Init cpumask attributes to only core 0 */
-       cpumask_set_cpu(0, &iommu_cpumask);
-       perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
-
-       /* Init events attributes */
-       if (_init_events_attrs(perf_iommu) != 0)
-               pr_err("perf: amd_iommu: Only support raw events.\n");
-
-       /* Init null attributes */
-       perf_iommu->null_group = NULL;
-       perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
-
-       ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
-       if (ret) {
-               pr_err("perf: amd_iommu: Failed to initialized.\n");
-               amd_iommu_pc_exit();
-       } else {
-               pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
-                       amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
-                       amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
-       }
-
-       return ret;
-}
-
-static struct perf_amd_iommu __perf_iommu = {
-       .pmu = {
-               .event_init     = perf_iommu_event_init,
-               .add            = perf_iommu_add,
-               .del            = perf_iommu_del,
-               .start          = perf_iommu_start,
-               .stop           = perf_iommu_stop,
-               .read           = perf_iommu_read,
-       },
-       .max_banks              = 0x00,
-       .max_counters           = 0x00,
-       .cntr_assign_mask       = 0ULL,
-       .format_group           = NULL,
-       .cpumask_group          = NULL,
-       .events_group           = NULL,
-       .null_group             = NULL,
-};
-
-static __init int amd_iommu_pc_init(void)
-{
-       /* Make sure the IOMMU PC resource is available */
-       if (!amd_iommu_pc_supported())
-               return -ENODEV;
-
-       _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
-
-       return 0;
-}
-
-device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
deleted file mode 100644 (file)
index 845d173..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Steven Kinney <Steven.Kinney@amd.com>
- * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef _PERF_EVENT_AMD_IOMMU_H_
-#define _PERF_EVENT_AMD_IOMMU_H_
-
-/* iommu pc mmio region register indexes */
-#define IOMMU_PC_COUNTER_REG                   0x00
-#define IOMMU_PC_COUNTER_SRC_REG               0x08
-#define IOMMU_PC_PASID_MATCH_REG               0x10
-#define IOMMU_PC_DOMID_MATCH_REG               0x18
-#define IOMMU_PC_DEVID_MATCH_REG               0x20
-#define IOMMU_PC_COUNTER_REPORT_REG            0x28
-
-/* maximun specified bank/counters */
-#define PC_MAX_SPEC_BNKS                       64
-#define PC_MAX_SPEC_CNTRS                      16
-
-/* iommu pc reg masks*/
-#define IOMMU_BASE_DEVID                       0x0000
-
-/* amd_iommu_init.c external support functions */
-extern bool amd_iommu_pc_supported(void);
-
-extern u8 amd_iommu_pc_get_max_banks(u16 devid);
-
-extern u8 amd_iommu_pc_get_max_counters(u16 devid);
-
-extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
-                       u8 fxn, u64 *value, bool is_write);
-
-#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
deleted file mode 100644 (file)
index 8836fc9..0000000
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
- *
- * Author: Jacob Shin <jacob.shin@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/perf_event.h>
-#include <linux/percpu.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-
-#include <asm/cpufeature.h>
-#include <asm/perf_event.h>
-#include <asm/msr.h>
-
-#define NUM_COUNTERS_NB                4
-#define NUM_COUNTERS_L2                4
-#define MAX_COUNTERS           NUM_COUNTERS_NB
-
-#define RDPMC_BASE_NB          6
-#define RDPMC_BASE_L2          10
-
-#define COUNTER_SHIFT          16
-
-struct amd_uncore {
-       int id;
-       int refcnt;
-       int cpu;
-       int num_counters;
-       int rdpmc_base;
-       u32 msr_base;
-       cpumask_t *active_mask;
-       struct pmu *pmu;
-       struct perf_event *events[MAX_COUNTERS];
-       struct amd_uncore *free_when_cpu_online;
-};
-
-static struct amd_uncore * __percpu *amd_uncore_nb;
-static struct amd_uncore * __percpu *amd_uncore_l2;
-
-static struct pmu amd_nb_pmu;
-static struct pmu amd_l2_pmu;
-
-static cpumask_t amd_nb_active_mask;
-static cpumask_t amd_l2_active_mask;
-
-static bool is_nb_event(struct perf_event *event)
-{
-       return event->pmu->type == amd_nb_pmu.type;
-}
-
-static bool is_l2_event(struct perf_event *event)
-{
-       return event->pmu->type == amd_l2_pmu.type;
-}
-
-static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
-{
-       if (is_nb_event(event) && amd_uncore_nb)
-               return *per_cpu_ptr(amd_uncore_nb, event->cpu);
-       else if (is_l2_event(event) && amd_uncore_l2)
-               return *per_cpu_ptr(amd_uncore_l2, event->cpu);
-
-       return NULL;
-}
-
-static void amd_uncore_read(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 prev, new;
-       s64 delta;
-
-       /*
-        * since we do not enable counter overflow interrupts,
-        * we do not have to worry about prev_count changing on us
-        */
-
-       prev = local64_read(&hwc->prev_count);
-       rdpmcl(hwc->event_base_rdpmc, new);
-       local64_set(&hwc->prev_count, new);
-       delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
-       delta >>= COUNTER_SHIFT;
-       local64_add(delta, &event->count);
-}
-
-static void amd_uncore_start(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (flags & PERF_EF_RELOAD)
-               wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
-
-       hwc->state = 0;
-       wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
-       perf_event_update_userpage(event);
-}
-
-static void amd_uncore_stop(struct perf_event *event, int flags)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       wrmsrl(hwc->config_base, hwc->config);
-       hwc->state |= PERF_HES_STOPPED;
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               amd_uncore_read(event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static int amd_uncore_add(struct perf_event *event, int flags)
-{
-       int i;
-       struct amd_uncore *uncore = event_to_amd_uncore(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       /* are we already assigned? */
-       if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
-               goto out;
-
-       for (i = 0; i < uncore->num_counters; i++) {
-               if (uncore->events[i] == event) {
-                       hwc->idx = i;
-                       goto out;
-               }
-       }
-
-       /* if not, take the first available counter */
-       hwc->idx = -1;
-       for (i = 0; i < uncore->num_counters; i++) {
-               if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
-                       hwc->idx = i;
-                       break;
-               }
-       }
-
-out:
-       if (hwc->idx == -1)
-               return -EBUSY;
-
-       hwc->config_base = uncore->msr_base + (2 * hwc->idx);
-       hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
-       hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       if (flags & PERF_EF_START)
-               amd_uncore_start(event, PERF_EF_RELOAD);
-
-       return 0;
-}
-
-static void amd_uncore_del(struct perf_event *event, int flags)
-{
-       int i;
-       struct amd_uncore *uncore = event_to_amd_uncore(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       amd_uncore_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < uncore->num_counters; i++) {
-               if (cmpxchg(&uncore->events[i], event, NULL) == event)
-                       break;
-       }
-
-       hwc->idx = -1;
-}
-
-static int amd_uncore_event_init(struct perf_event *event)
-{
-       struct amd_uncore *uncore;
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       /*
-        * NB and L2 counters (MSRs) are shared across all cores that share the
-        * same NB / L2 cache. Interrupts can be directed to a single target
-        * core, however, event counts generated by processes running on other
-        * cores cannot be masked out. So we do not support sampling and
-        * per-thread events.
-        */
-       if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
-               return -EINVAL;
-
-       /* NB and L2 counters do not have usr/os/guest/host bits */
-       if (event->attr.exclude_user || event->attr.exclude_kernel ||
-           event->attr.exclude_host || event->attr.exclude_guest)
-               return -EINVAL;
-
-       /* and we do not enable counter overflow interrupts */
-       hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
-       hwc->idx = -1;
-
-       if (event->cpu < 0)
-               return -EINVAL;
-
-       uncore = event_to_amd_uncore(event);
-       if (!uncore)
-               return -ENODEV;
-
-       /*
-        * since request can come in to any of the shared cores, we will remap
-        * to a single common cpu.
-        */
-       event->cpu = uncore->cpu;
-
-       return 0;
-}
-
-static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
-                                           struct device_attribute *attr,
-                                           char *buf)
-{
-       cpumask_t *active_mask;
-       struct pmu *pmu = dev_get_drvdata(dev);
-
-       if (pmu->type == amd_nb_pmu.type)
-               active_mask = &amd_nb_active_mask;
-       else if (pmu->type == amd_l2_pmu.type)
-               active_mask = &amd_l2_active_mask;
-       else
-               return 0;
-
-       return cpumap_print_to_pagebuf(true, buf, active_mask);
-}
-static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
-
-static struct attribute *amd_uncore_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_uncore_attr_group = {
-       .attrs = amd_uncore_attrs,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7,32-35");
-PMU_FORMAT_ATTR(umask, "config:8-15");
-
-static struct attribute *amd_uncore_format_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       NULL,
-};
-
-static struct attribute_group amd_uncore_format_group = {
-       .name = "format",
-       .attrs = amd_uncore_format_attr,
-};
-
-static const struct attribute_group *amd_uncore_attr_groups[] = {
-       &amd_uncore_attr_group,
-       &amd_uncore_format_group,
-       NULL,
-};
-
-static struct pmu amd_nb_pmu = {
-       .attr_groups    = amd_uncore_attr_groups,
-       .name           = "amd_nb",
-       .event_init     = amd_uncore_event_init,
-       .add            = amd_uncore_add,
-       .del            = amd_uncore_del,
-       .start          = amd_uncore_start,
-       .stop           = amd_uncore_stop,
-       .read           = amd_uncore_read,
-};
-
-static struct pmu amd_l2_pmu = {
-       .attr_groups    = amd_uncore_attr_groups,
-       .name           = "amd_l2",
-       .event_init     = amd_uncore_event_init,
-       .add            = amd_uncore_add,
-       .del            = amd_uncore_del,
-       .start          = amd_uncore_start,
-       .stop           = amd_uncore_stop,
-       .read           = amd_uncore_read,
-};
-
-static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
-{
-       return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
-                       cpu_to_node(cpu));
-}
-
-static int amd_uncore_cpu_up_prepare(unsigned int cpu)
-{
-       struct amd_uncore *uncore_nb = NULL, *uncore_l2;
-
-       if (amd_uncore_nb) {
-               uncore_nb = amd_uncore_alloc(cpu);
-               if (!uncore_nb)
-                       goto fail;
-               uncore_nb->cpu = cpu;
-               uncore_nb->num_counters = NUM_COUNTERS_NB;
-               uncore_nb->rdpmc_base = RDPMC_BASE_NB;
-               uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
-               uncore_nb->active_mask = &amd_nb_active_mask;
-               uncore_nb->pmu = &amd_nb_pmu;
-               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
-       }
-
-       if (amd_uncore_l2) {
-               uncore_l2 = amd_uncore_alloc(cpu);
-               if (!uncore_l2)
-                       goto fail;
-               uncore_l2->cpu = cpu;
-               uncore_l2->num_counters = NUM_COUNTERS_L2;
-               uncore_l2->rdpmc_base = RDPMC_BASE_L2;
-               uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
-               uncore_l2->active_mask = &amd_l2_active_mask;
-               uncore_l2->pmu = &amd_l2_pmu;
-               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
-       }
-
-       return 0;
-
-fail:
-       if (amd_uncore_nb)
-               *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
-       kfree(uncore_nb);
-       return -ENOMEM;
-}
-
-static struct amd_uncore *
-amd_uncore_find_online_sibling(struct amd_uncore *this,
-                              struct amd_uncore * __percpu *uncores)
-{
-       unsigned int cpu;
-       struct amd_uncore *that;
-
-       for_each_online_cpu(cpu) {
-               that = *per_cpu_ptr(uncores, cpu);
-
-               if (!that)
-                       continue;
-
-               if (this == that)
-                       continue;
-
-               if (this->id == that->id) {
-                       that->free_when_cpu_online = this;
-                       this = that;
-                       break;
-               }
-       }
-
-       this->refcnt++;
-       return this;
-}
-
-static void amd_uncore_cpu_starting(unsigned int cpu)
-{
-       unsigned int eax, ebx, ecx, edx;
-       struct amd_uncore *uncore;
-
-       if (amd_uncore_nb) {
-               uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
-               cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-               uncore->id = ecx & 0xff;
-
-               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
-               *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
-       }
-
-       if (amd_uncore_l2) {
-               unsigned int apicid = cpu_data(cpu).apicid;
-               unsigned int nshared;
-
-               uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
-               cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
-               nshared = ((eax >> 14) & 0xfff) + 1;
-               uncore->id = apicid - (apicid % nshared);
-
-               uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
-               *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
-       }
-}
-
-static void uncore_online(unsigned int cpu,
-                         struct amd_uncore * __percpu *uncores)
-{
-       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
-
-       kfree(uncore->free_when_cpu_online);
-       uncore->free_when_cpu_online = NULL;
-
-       if (cpu == uncore->cpu)
-               cpumask_set_cpu(cpu, uncore->active_mask);
-}
-
-static void amd_uncore_cpu_online(unsigned int cpu)
-{
-       if (amd_uncore_nb)
-               uncore_online(cpu, amd_uncore_nb);
-
-       if (amd_uncore_l2)
-               uncore_online(cpu, amd_uncore_l2);
-}
-
-static void uncore_down_prepare(unsigned int cpu,
-                               struct amd_uncore * __percpu *uncores)
-{
-       unsigned int i;
-       struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
-
-       if (this->cpu != cpu)
-               return;
-
-       /* this cpu is going down, migrate to a shared sibling if possible */
-       for_each_online_cpu(i) {
-               struct amd_uncore *that = *per_cpu_ptr(uncores, i);
-
-               if (cpu == i)
-                       continue;
-
-               if (this == that) {
-                       perf_pmu_migrate_context(this->pmu, cpu, i);
-                       cpumask_clear_cpu(cpu, that->active_mask);
-                       cpumask_set_cpu(i, that->active_mask);
-                       that->cpu = i;
-                       break;
-               }
-       }
-}
-
-static void amd_uncore_cpu_down_prepare(unsigned int cpu)
-{
-       if (amd_uncore_nb)
-               uncore_down_prepare(cpu, amd_uncore_nb);
-
-       if (amd_uncore_l2)
-               uncore_down_prepare(cpu, amd_uncore_l2);
-}
-
-static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
-{
-       struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
-
-       if (cpu == uncore->cpu)
-               cpumask_clear_cpu(cpu, uncore->active_mask);
-
-       if (!--uncore->refcnt)
-               kfree(uncore);
-       *per_cpu_ptr(uncores, cpu) = NULL;
-}
-
-static void amd_uncore_cpu_dead(unsigned int cpu)
-{
-       if (amd_uncore_nb)
-               uncore_dead(cpu, amd_uncore_nb);
-
-       if (amd_uncore_l2)
-               uncore_dead(cpu, amd_uncore_l2);
-}
-
-static int
-amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
-                       void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               if (amd_uncore_cpu_up_prepare(cpu))
-                       return notifier_from_errno(-ENOMEM);
-               break;
-
-       case CPU_STARTING:
-               amd_uncore_cpu_starting(cpu);
-               break;
-
-       case CPU_ONLINE:
-               amd_uncore_cpu_online(cpu);
-               break;
-
-       case CPU_DOWN_PREPARE:
-               amd_uncore_cpu_down_prepare(cpu);
-               break;
-
-       case CPU_UP_CANCELED:
-       case CPU_DEAD:
-               amd_uncore_cpu_dead(cpu);
-               break;
-
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block amd_uncore_cpu_notifier_block = {
-       .notifier_call  = amd_uncore_cpu_notifier,
-       .priority       = CPU_PRI_PERF + 1,
-};
-
-static void __init init_cpu_already_online(void *dummy)
-{
-       unsigned int cpu = smp_processor_id();
-
-       amd_uncore_cpu_starting(cpu);
-       amd_uncore_cpu_online(cpu);
-}
-
-static void cleanup_cpu_online(void *dummy)
-{
-       unsigned int cpu = smp_processor_id();
-
-       amd_uncore_cpu_dead(cpu);
-}
-
-static int __init amd_uncore_init(void)
-{
-       unsigned int cpu, cpu2;
-       int ret = -ENODEV;
-
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
-               goto fail_nodev;
-
-       if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
-               goto fail_nodev;
-
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
-               amd_uncore_nb = alloc_percpu(struct amd_uncore *);
-               if (!amd_uncore_nb) {
-                       ret = -ENOMEM;
-                       goto fail_nb;
-               }
-               ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
-               if (ret)
-                       goto fail_nb;
-
-               printk(KERN_INFO "perf: AMD NB counters detected\n");
-               ret = 0;
-       }
-
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
-               amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
-               if (!amd_uncore_l2) {
-                       ret = -ENOMEM;
-                       goto fail_l2;
-               }
-               ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
-               if (ret)
-                       goto fail_l2;
-
-               printk(KERN_INFO "perf: AMD L2I counters detected\n");
-               ret = 0;
-       }
-
-       if (ret)
-               goto fail_nodev;
-
-       cpu_notifier_register_begin();
-
-       /* init cpus already online before registering for hotplug notifier */
-       for_each_online_cpu(cpu) {
-               ret = amd_uncore_cpu_up_prepare(cpu);
-               if (ret)
-                       goto fail_online;
-               smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
-       }
-
-       __register_cpu_notifier(&amd_uncore_cpu_notifier_block);
-       cpu_notifier_register_done();
-
-       return 0;
-
-
-fail_online:
-       for_each_online_cpu(cpu2) {
-               if (cpu2 == cpu)
-                       break;
-               smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
-       }
-       cpu_notifier_register_done();
-
-       /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
-       amd_uncore_nb = amd_uncore_l2 = NULL;
-
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_L2))
-               perf_pmu_unregister(&amd_l2_pmu);
-fail_l2:
-       if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
-               perf_pmu_unregister(&amd_nb_pmu);
-       if (amd_uncore_l2)
-               free_percpu(amd_uncore_l2);
-fail_nb:
-       if (amd_uncore_nb)
-               free_percpu(amd_uncore_nb);
-
-fail_nodev:
-       return ret;
-}
-device_initcall(amd_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
deleted file mode 100644 (file)
index fed2ab1..0000000
+++ /dev/null
@@ -1,3773 +0,0 @@
-/*
- * Per core/cpu state
- *
- * Used to coordinate shared registers between HT threads or
- * among events on a single PMU.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/stddef.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/nmi.h>
-
-#include <asm/cpufeature.h>
-#include <asm/hardirq.h>
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-/*
- * Intel PerfMon, used on Core and later.
- */
-static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
-{
-       [PERF_COUNT_HW_CPU_CYCLES]              = 0x003c,
-       [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
-       [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x4f2e,
-       [PERF_COUNT_HW_CACHE_MISSES]            = 0x412e,
-       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c4,
-       [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c5,
-       [PERF_COUNT_HW_BUS_CYCLES]              = 0x013c,
-       [PERF_COUNT_HW_REF_CPU_CYCLES]          = 0x0300, /* pseudo-encoding */
-};
-
-static struct event_constraint intel_core_event_constraints[] __read_mostly =
-{
-       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
-       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
-       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
-       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
-       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
-       INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_core2_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
-       INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
-       INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
-       INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
-       INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
-       INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
-       INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
-       INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
-       INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
-       INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
-       INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
-       INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
-       INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
-       INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
-       INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
-       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
-{
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
-       EVENT_EXTRA_END
-};
-
-static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-       INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
-       INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
-       INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_snb_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
-       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-       INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
-       INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
-       INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
-       INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
-       INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
-{
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
-       EVENT_EXTRA_END
-};
-
-static struct event_constraint intel_v1_event_constraints[] __read_mostly =
-{
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_gen_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint intel_slm_event_constraints[] __read_mostly =
-{
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_skl_event_constraints[] = {
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),    /* INST_RETIRED.PREC_DIST */
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
-       INTEL_UEVENT_EXTRA_REG(0x01b7,
-                              MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x02b7,
-                              MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
-       EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       EVENT_EXTRA_END
-};
-
-static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       /*
-        * Note the low 8 bits eventsel code is not a continuous field, containing
-        * some #GPing bits. These are masked out.
-        */
-       INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
-       EVENT_EXTRA_END
-};
-
-EVENT_ATTR_STR(mem-loads,      mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
-
-struct attribute *nhm_events_attrs[] = {
-       EVENT_PTR(mem_ld_nhm),
-       NULL,
-};
-
-struct attribute *snb_events_attrs[] = {
-       EVENT_PTR(mem_ld_snb),
-       EVENT_PTR(mem_st_snb),
-       NULL,
-};
-
-static struct event_constraint intel_hsw_event_constraints[] = {
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
-       /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
-       /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
-       /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
-       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
-
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_bdw_event_constraints[] = {
-       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
-       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
-       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
-       INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
-       INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),        /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
-       EVENT_CONSTRAINT_END
-};
-
-static u64 intel_pmu_event_map(int hw_event)
-{
-       return intel_perfmon_event_map[hw_event];
-}
-
-/*
- * Notes on the events:
- * - data reads do not include code reads (comparable to earlier tables)
- * - data counts include speculative execution (except L1 write, dtlb, bpu)
- * - remote node access includes remote memory, remote cache, remote mmio.
- * - prefetches are not included in the counts.
- * - icache miss does not include decoded icache
- */
-
-#define SKL_DEMAND_DATA_RD             BIT_ULL(0)
-#define SKL_DEMAND_RFO                 BIT_ULL(1)
-#define SKL_ANY_RESPONSE               BIT_ULL(16)
-#define SKL_SUPPLIER_NONE              BIT_ULL(17)
-#define SKL_L3_MISS_LOCAL_DRAM         BIT_ULL(26)
-#define SKL_L3_MISS_REMOTE_HOP0_DRAM   BIT_ULL(27)
-#define SKL_L3_MISS_REMOTE_HOP1_DRAM   BIT_ULL(28)
-#define SKL_L3_MISS_REMOTE_HOP2P_DRAM  BIT_ULL(29)
-#define SKL_L3_MISS                    (SKL_L3_MISS_LOCAL_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP0_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
-#define SKL_SPL_HIT                    BIT_ULL(30)
-#define SKL_SNOOP_NONE                 BIT_ULL(31)
-#define SKL_SNOOP_NOT_NEEDED           BIT_ULL(32)
-#define SKL_SNOOP_MISS                 BIT_ULL(33)
-#define SKL_SNOOP_HIT_NO_FWD           BIT_ULL(34)
-#define SKL_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
-#define SKL_SNOOP_HITM                 BIT_ULL(36)
-#define SKL_SNOOP_NON_DRAM             BIT_ULL(37)
-#define SKL_ANY_SNOOP                  (SKL_SPL_HIT|SKL_SNOOP_NONE| \
-                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
-                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
-                                        SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
-#define SKL_DEMAND_READ                        SKL_DEMAND_DATA_RD
-#define SKL_SNOOP_DRAM                 (SKL_SNOOP_NONE| \
-                                        SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
-                                        SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
-                                        SKL_SNOOP_HITM|SKL_SPL_HIT)
-#define SKL_DEMAND_WRITE               SKL_DEMAND_RFO
-#define SKL_LLC_ACCESS                 SKL_ANY_RESPONSE
-#define SKL_L3_MISS_REMOTE             (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP1_DRAM| \
-                                        SKL_L3_MISS_REMOTE_HOP2P_DRAM)
-
-static __initconst const u64 skl_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x283,   /* ICACHE_64B.MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x608,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x649,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2085,  /* ITLB_MISSES.STLB_HIT */
-               [ C(RESULT_MISS)   ] = 0xe85,   /* ITLB_MISSES.WALK_COMPLETED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-static __initconst const u64 skl_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
-                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
-                                      SKL_L3_MISS|SKL_ANY_SNOOP|
-                                      SKL_SUPPLIER_NONE,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
-                                      SKL_LLC_ACCESS|SKL_ANY_SNOOP,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
-                                      SKL_L3_MISS|SKL_ANY_SNOOP|
-                                      SKL_SUPPLIER_NONE,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
-                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
-                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
-                                      SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
-                                      SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-#define SNB_DMND_DATA_RD       (1ULL << 0)
-#define SNB_DMND_RFO           (1ULL << 1)
-#define SNB_DMND_IFETCH                (1ULL << 2)
-#define SNB_DMND_WB            (1ULL << 3)
-#define SNB_PF_DATA_RD         (1ULL << 4)
-#define SNB_PF_RFO             (1ULL << 5)
-#define SNB_PF_IFETCH          (1ULL << 6)
-#define SNB_LLC_DATA_RD                (1ULL << 7)
-#define SNB_LLC_RFO            (1ULL << 8)
-#define SNB_LLC_IFETCH         (1ULL << 9)
-#define SNB_BUS_LOCKS          (1ULL << 10)
-#define SNB_STRM_ST            (1ULL << 11)
-#define SNB_OTHER              (1ULL << 15)
-#define SNB_RESP_ANY           (1ULL << 16)
-#define SNB_NO_SUPP            (1ULL << 17)
-#define SNB_LLC_HITM           (1ULL << 18)
-#define SNB_LLC_HITE           (1ULL << 19)
-#define SNB_LLC_HITS           (1ULL << 20)
-#define SNB_LLC_HITF           (1ULL << 21)
-#define SNB_LOCAL              (1ULL << 22)
-#define SNB_REMOTE             (0xffULL << 23)
-#define SNB_SNP_NONE           (1ULL << 31)
-#define SNB_SNP_NOT_NEEDED     (1ULL << 32)
-#define SNB_SNP_MISS           (1ULL << 33)
-#define SNB_NO_FWD             (1ULL << 34)
-#define SNB_SNP_FWD            (1ULL << 35)
-#define SNB_HITM               (1ULL << 36)
-#define SNB_NON_DRAM           (1ULL << 37)
-
-#define SNB_DMND_READ          (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
-#define SNB_DMND_WRITE         (SNB_DMND_RFO|SNB_LLC_RFO)
-#define SNB_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
-
-#define SNB_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
-                                SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
-                                SNB_HITM)
-
-#define SNB_DRAM_ANY           (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
-#define SNB_DRAM_REMOTE                (SNB_REMOTE|SNB_SNP_ANY)
-
-#define SNB_L3_ACCESS          SNB_RESP_ANY
-#define SNB_L3_MISS            (SNB_DRAM_ANY|SNB_NON_DRAM)
-
-static __initconst const u64 snb_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
-               [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
-               [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
-               [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
-       },
- },
-};
-
-static __initconst const u64 snb_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
-               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
-               [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
-               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
-
-};
-
-/*
- * Notes on the events:
- * - data reads do not include code reads (comparable to earlier tables)
- * - data counts include speculative execution (except L1 write, dtlb, bpu)
- * - remote node access includes remote memory, remote cache, remote mmio.
- * - prefetches are not included in the counts because they are not
- *   reliably counted.
- */
-
-#define HSW_DEMAND_DATA_RD             BIT_ULL(0)
-#define HSW_DEMAND_RFO                 BIT_ULL(1)
-#define HSW_ANY_RESPONSE               BIT_ULL(16)
-#define HSW_SUPPLIER_NONE              BIT_ULL(17)
-#define HSW_L3_MISS_LOCAL_DRAM         BIT_ULL(22)
-#define HSW_L3_MISS_REMOTE_HOP0                BIT_ULL(27)
-#define HSW_L3_MISS_REMOTE_HOP1                BIT_ULL(28)
-#define HSW_L3_MISS_REMOTE_HOP2P       BIT_ULL(29)
-#define HSW_L3_MISS                    (HSW_L3_MISS_LOCAL_DRAM| \
-                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
-                                        HSW_L3_MISS_REMOTE_HOP2P)
-#define HSW_SNOOP_NONE                 BIT_ULL(31)
-#define HSW_SNOOP_NOT_NEEDED           BIT_ULL(32)
-#define HSW_SNOOP_MISS                 BIT_ULL(33)
-#define HSW_SNOOP_HIT_NO_FWD           BIT_ULL(34)
-#define HSW_SNOOP_HIT_WITH_FWD         BIT_ULL(35)
-#define HSW_SNOOP_HITM                 BIT_ULL(36)
-#define HSW_SNOOP_NON_DRAM             BIT_ULL(37)
-#define HSW_ANY_SNOOP                  (HSW_SNOOP_NONE| \
-                                        HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
-                                        HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
-                                        HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
-#define HSW_SNOOP_DRAM                 (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
-#define HSW_DEMAND_READ                        HSW_DEMAND_DATA_RD
-#define HSW_DEMAND_WRITE               HSW_DEMAND_RFO
-#define HSW_L3_MISS_REMOTE             (HSW_L3_MISS_REMOTE_HOP0|\
-                                        HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
-#define HSW_LLC_ACCESS                 HSW_ANY_RESPONSE
-
-#define BDW_L3_MISS_LOCAL              BIT(26)
-#define BDW_L3_MISS                    (BDW_L3_MISS_LOCAL| \
-                                        HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
-                                        HSW_L3_MISS_REMOTE_HOP2P)
-
-
-static __initconst const u64 hsw_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x280,   /* ICACHE.MISSES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x108,   /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x149,   /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x6085,  /* ITLB_MISSES.STLB_HIT */
-               [ C(RESULT_MISS)   ] = 0x185,   /* ITLB_MISSES.MISS_CAUSES_A_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
-               [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-static __initconst const u64 hsw_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
-                                      HSW_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
-                                      HSW_L3_MISS|HSW_ANY_SNOOP,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
-                                      HSW_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
-                                      HSW_L3_MISS|HSW_ANY_SNOOP,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
-                                      HSW_L3_MISS_LOCAL_DRAM|
-                                      HSW_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
-                                      HSW_L3_MISS_REMOTE|
-                                      HSW_SNOOP_DRAM,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
-                                      HSW_L3_MISS_LOCAL_DRAM|
-                                      HSW_SNOOP_DRAM,
-               [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
-                                      HSW_L3_MISS_REMOTE|
-                                      HSW_SNOOP_DRAM,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
-};
-
-static __initconst const u64 westmere_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       /*
-        * Use RFO, not WRITEBACK, because a write miss would typically occur
-        * on RFO.
-        */
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-               [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
-};
-
-/*
- * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
- * See IA32 SDM Vol 3B 30.6.1.3
- */
-
-#define NHM_DMND_DATA_RD       (1 << 0)
-#define NHM_DMND_RFO           (1 << 1)
-#define NHM_DMND_IFETCH                (1 << 2)
-#define NHM_DMND_WB            (1 << 3)
-#define NHM_PF_DATA_RD         (1 << 4)
-#define NHM_PF_DATA_RFO                (1 << 5)
-#define NHM_PF_IFETCH          (1 << 6)
-#define NHM_OFFCORE_OTHER      (1 << 7)
-#define NHM_UNCORE_HIT         (1 << 8)
-#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
-#define NHM_OTHER_CORE_HITM    (1 << 10)
-                               /* reserved */
-#define NHM_REMOTE_CACHE_FWD   (1 << 12)
-#define NHM_REMOTE_DRAM                (1 << 13)
-#define NHM_LOCAL_DRAM         (1 << 14)
-#define NHM_NON_DRAM           (1 << 15)
-
-#define NHM_LOCAL              (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
-#define NHM_REMOTE             (NHM_REMOTE_DRAM)
-
-#define NHM_DMND_READ          (NHM_DMND_DATA_RD)
-#define NHM_DMND_WRITE         (NHM_DMND_RFO|NHM_DMND_WB)
-#define NHM_DMND_PREFETCH      (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
-
-#define NHM_L3_HIT     (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
-#define NHM_L3_MISS    (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
-#define NHM_L3_ACCESS  (NHM_L3_HIT|NHM_L3_MISS)
-
-static __initconst const u64 nehalem_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
-               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
-               [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
-               [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
-               [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
-       },
- },
-};
-
-static __initconst const u64 nehalem_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
-               [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
-               [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-               [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       /*
-        * Use RFO, not WRITEBACK, because a write miss would typically occur
-        * on RFO.
-        */
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
-               [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
-               [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-               [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
-};
-
-static __initconst const u64 core2_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
-               [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
-               [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-               [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-               [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-static __initconst const u64 atom_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
-               [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-               [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
-               [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-               [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-static struct extra_reg intel_slm_extra_regs[] __read_mostly =
-{
-       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
-       EVENT_EXTRA_END
-};
-
-#define SLM_DMND_READ          SNB_DMND_DATA_RD
-#define SLM_DMND_WRITE         SNB_DMND_RFO
-#define SLM_DMND_PREFETCH      (SNB_PF_DATA_RD|SNB_PF_RFO)
-
-#define SLM_SNP_ANY            (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
-#define SLM_LLC_ACCESS         SNB_RESP_ANY
-#define SLM_LLC_MISS           (SLM_SNP_ANY|SNB_NON_DRAM)
-
-static __initconst const u64 slm_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
-               [ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
-       },
- },
-};
-
-static __initconst const u64 slm_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
-               [ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
-       [ C(OP_PREFETCH) ] = {
-               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
-               [ C(RESULT_ACCESS) ] = 0x01b7,
-               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
-               [ C(RESULT_MISS)   ] = 0x01b7,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
-               [ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
-               [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-#define KNL_OT_L2_HITE         BIT_ULL(19) /* Other Tile L2 Hit */
-#define KNL_OT_L2_HITF         BIT_ULL(20) /* Other Tile L2 Hit */
-#define KNL_MCDRAM_LOCAL       BIT_ULL(21)
-#define KNL_MCDRAM_FAR         BIT_ULL(22)
-#define KNL_DDR_LOCAL          BIT_ULL(23)
-#define KNL_DDR_FAR            BIT_ULL(24)
-#define KNL_DRAM_ANY           (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
-                                   KNL_DDR_LOCAL | KNL_DDR_FAR)
-#define KNL_L2_READ            SLM_DMND_READ
-#define KNL_L2_WRITE           SLM_DMND_WRITE
-#define KNL_L2_PREFETCH                SLM_DMND_PREFETCH
-#define KNL_L2_ACCESS          SLM_LLC_ACCESS
-#define KNL_L2_MISS            (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
-                                  KNL_DRAM_ANY | SNB_SNP_ANY | \
-                                                 SNB_NON_DRAM)
-
-static __initconst const u64 knl_hw_cache_extra_regs
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
-       [C(LL)] = {
-               [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
-                       [C(RESULT_MISS)]   = 0,
-               },
-               [C(OP_WRITE)] = {
-                       [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
-                       [C(RESULT_MISS)]   = KNL_L2_WRITE | KNL_L2_MISS,
-               },
-               [C(OP_PREFETCH)] = {
-                       [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
-                       [C(RESULT_MISS)]   = KNL_L2_PREFETCH | KNL_L2_MISS,
-               },
-       },
-};
-
-/*
- * Use from PMIs where the LBRs are already disabled.
- */
-static void __intel_pmu_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-               intel_pmu_disable_bts();
-       else
-               intel_bts_disable_local();
-
-       intel_pmu_pebs_disable_all();
-}
-
-static void intel_pmu_disable_all(void)
-{
-       __intel_pmu_disable_all();
-       intel_pmu_lbr_disable_all();
-}
-
-static void __intel_pmu_enable_all(int added, bool pmi)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       intel_pmu_pebs_enable_all();
-       intel_pmu_lbr_enable_all(pmi);
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
-                       x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
-
-       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-               struct perf_event *event =
-                       cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
-
-               if (WARN_ON_ONCE(!event))
-                       return;
-
-               intel_pmu_enable_bts(event->hw.config);
-       } else
-               intel_bts_enable_local();
-}
-
-static void intel_pmu_enable_all(int added)
-{
-       __intel_pmu_enable_all(added, false);
-}
-
-/*
- * Workaround for:
- *   Intel Errata AAK100 (model 26)
- *   Intel Errata AAP53  (model 30)
- *   Intel Errata BD53   (model 44)
- *
- * The official story:
- *   These chips need to be 'reset' when adding counters by programming the
- *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
- *   in sequence on the same PMC or on different PMCs.
- *
- * In practise it appears some of these events do in fact count, and
- * we need to programm all 4 events.
- */
-static void intel_pmu_nhm_workaround(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       static const unsigned long nhm_magic[4] = {
-               0x4300B5,
-               0x4300D2,
-               0x4300B1,
-               0x4300B1
-       };
-       struct perf_event *event;
-       int i;
-
-       /*
-        * The Errata requires below steps:
-        * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
-        * 2) Configure 4 PERFEVTSELx with the magic events and clear
-        *    the corresponding PMCx;
-        * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
-        * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
-        * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
-        */
-
-       /*
-        * The real steps we choose are a little different from above.
-        * A) To reduce MSR operations, we don't run step 1) as they
-        *    are already cleared before this function is called;
-        * B) Call x86_perf_event_update to save PMCx before configuring
-        *    PERFEVTSELx with magic number;
-        * C) With step 5), we do clear only when the PERFEVTSELx is
-        *    not used currently.
-        * D) Call x86_perf_event_set_period to restore PMCx;
-        */
-
-       /* We always operate 4 pairs of PERF Counters */
-       for (i = 0; i < 4; i++) {
-               event = cpuc->events[i];
-               if (event)
-                       x86_perf_event_update(event);
-       }
-
-       for (i = 0; i < 4; i++) {
-               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
-               wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
-       }
-
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
-
-       for (i = 0; i < 4; i++) {
-               event = cpuc->events[i];
-
-               if (event) {
-                       x86_perf_event_set_period(event);
-                       __x86_pmu_enable_event(&event->hw,
-                                       ARCH_PERFMON_EVENTSEL_ENABLE);
-               } else
-                       wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
-       }
-}
-
-static void intel_pmu_nhm_enable_all(int added)
-{
-       if (added)
-               intel_pmu_nhm_workaround();
-       intel_pmu_enable_all(added);
-}
-
-static inline u64 intel_pmu_get_status(void)
-{
-       u64 status;
-
-       rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
-       return status;
-}
-
-static inline void intel_pmu_ack_status(u64 ack)
-{
-       wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
-
-static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
-{
-       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
-       u64 ctrl_val, mask;
-
-       mask = 0xfULL << (idx * 4);
-
-       rdmsrl(hwc->config_base, ctrl_val);
-       ctrl_val &= ~mask;
-       wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static inline bool event_is_checkpointed(struct perf_event *event)
-{
-       return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
-}
-
-static void intel_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
-               intel_pmu_disable_bts();
-               intel_pmu_drain_bts_buffer();
-               return;
-       }
-
-       cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
-       cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
-       cpuc->intel_cp_status &= ~(1ull << hwc->idx);
-
-       /*
-        * must disable before any actual event
-        * because any event may be combined with LBR
-        */
-       if (needs_branch_stack(event))
-               intel_pmu_lbr_disable(event);
-
-       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-               intel_pmu_disable_fixed(hwc);
-               return;
-       }
-
-       x86_pmu_disable_event(event);
-
-       if (unlikely(event->attr.precise_ip))
-               intel_pmu_pebs_disable(event);
-}
-
-static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
-{
-       int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
-       u64 ctrl_val, bits, mask;
-
-       /*
-        * Enable IRQ generation (0x8),
-        * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
-        * if requested:
-        */
-       bits = 0x8ULL;
-       if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
-               bits |= 0x2;
-       if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-               bits |= 0x1;
-
-       /*
-        * ANY bit is supported in v3 and up
-        */
-       if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
-               bits |= 0x4;
-
-       bits <<= (idx * 4);
-       mask = 0xfULL << (idx * 4);
-
-       rdmsrl(hwc->config_base, ctrl_val);
-       ctrl_val &= ~mask;
-       ctrl_val |= bits;
-       wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void intel_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
-               if (!__this_cpu_read(cpu_hw_events.enabled))
-                       return;
-
-               intel_pmu_enable_bts(hwc->config);
-               return;
-       }
-       /*
-        * must enabled before any actual event
-        * because any event may be combined with LBR
-        */
-       if (needs_branch_stack(event))
-               intel_pmu_lbr_enable(event);
-
-       if (event->attr.exclude_host)
-               cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
-       if (event->attr.exclude_guest)
-               cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
-
-       if (unlikely(event_is_checkpointed(event)))
-               cpuc->intel_cp_status |= (1ull << hwc->idx);
-
-       if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-               intel_pmu_enable_fixed(hwc);
-               return;
-       }
-
-       if (unlikely(event->attr.precise_ip))
-               intel_pmu_pebs_enable(event);
-
-       __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-}
-
-/*
- * Save and restart an expired event. Called by NMI contexts,
- * so it has to be careful about preempting normal event ops:
- */
-int intel_pmu_save_and_restart(struct perf_event *event)
-{
-       x86_perf_event_update(event);
-       /*
-        * For a checkpointed counter always reset back to 0.  This
-        * avoids a situation where the counter overflows, aborts the
-        * transaction and is then set back to shortly before the
-        * overflow, and overflows and aborts again.
-        */
-       if (unlikely(event_is_checkpointed(event))) {
-               /* No race with NMIs because the counter should not be armed */
-               wrmsrl(event->hw.event_base, 0);
-               local64_set(&event->hw.prev_count, 0);
-       }
-       return x86_perf_event_set_period(event);
-}
-
-static void intel_pmu_reset(void)
-{
-       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
-       unsigned long flags;
-       int idx;
-
-       if (!x86_pmu.num_counters)
-               return;
-
-       local_irq_save(flags);
-
-       pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
-               wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
-       }
-       for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
-               wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-
-       if (ds)
-               ds->bts_index = ds->bts_buffer_base;
-
-       /* Ack all overflows and disable fixed counters */
-       if (x86_pmu.version >= 2) {
-               intel_pmu_ack_status(intel_pmu_get_status());
-               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-       }
-
-       /* Reset LBRs and LBR freezing */
-       if (x86_pmu.lbr_nr) {
-               update_debugctlmsr(get_debugctlmsr() &
-                       ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
-       }
-
-       local_irq_restore(flags);
-}
-
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       int bit, loops;
-       u64 status;
-       int handled;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       /*
-        * No known reason to not always do late ACK,
-        * but just in case do it opt-in.
-        */
-       if (!x86_pmu.late_ack)
-               apic_write(APIC_LVTPC, APIC_DM_NMI);
-       __intel_pmu_disable_all();
-       handled = intel_pmu_drain_bts_buffer();
-       handled += intel_bts_interrupt();
-       status = intel_pmu_get_status();
-       if (!status)
-               goto done;
-
-       loops = 0;
-again:
-       intel_pmu_lbr_read();
-       intel_pmu_ack_status(status);
-       if (++loops > 100) {
-               static bool warned = false;
-               if (!warned) {
-                       WARN(1, "perfevents: irq loop stuck!\n");
-                       perf_event_print_debug();
-                       warned = true;
-               }
-               intel_pmu_reset();
-               goto done;
-       }
-
-       inc_irq_stat(apic_perf_irqs);
-
-
-       /*
-        * Ignore a range of extra bits in status that do not indicate
-        * overflow by themselves.
-        */
-       status &= ~(GLOBAL_STATUS_COND_CHG |
-                   GLOBAL_STATUS_ASIF |
-                   GLOBAL_STATUS_LBRS_FROZEN);
-       if (!status)
-               goto done;
-
-       /*
-        * PEBS overflow sets bit 62 in the global status register
-        */
-       if (__test_and_clear_bit(62, (unsigned long *)&status)) {
-               handled++;
-               x86_pmu.drain_pebs(regs);
-       }
-
-       /*
-        * Intel PT
-        */
-       if (__test_and_clear_bit(55, (unsigned long *)&status)) {
-               handled++;
-               intel_pt_interrupt();
-       }
-
-       /*
-        * Checkpointed counters can lead to 'spurious' PMIs because the
-        * rollback caused by the PMI will have cleared the overflow status
-        * bit. Therefore always force probe these counters.
-        */
-       status |= cpuc->intel_cp_status;
-
-       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-               struct perf_event *event = cpuc->events[bit];
-
-               handled++;
-
-               if (!test_bit(bit, cpuc->active_mask))
-                       continue;
-
-               if (!intel_pmu_save_and_restart(event))
-                       continue;
-
-               perf_sample_data_init(&data, 0, event->hw.last_period);
-
-               if (has_branch_stack(event))
-                       data.br_stack = &cpuc->lbr_stack;
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       /*
-        * Repeat if there is more work to be done:
-        */
-       status = intel_pmu_get_status();
-       if (status)
-               goto again;
-
-done:
-       __intel_pmu_enable_all(0, true);
-       /*
-        * Only unmask the NMI after the overflow counters
-        * have been reset. This avoids spurious NMIs on
-        * Haswell CPUs.
-        */
-       if (x86_pmu.late_ack)
-               apic_write(APIC_LVTPC, APIC_DM_NMI);
-       return handled;
-}
-
-static struct event_constraint *
-intel_bts_constraints(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned int hw_event, bts_event;
-
-       if (event->attr.freq)
-               return NULL;
-
-       hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
-       bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
-       if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
-               return &bts_constraint;
-
-       return NULL;
-}
-
-static int intel_alt_er(int idx, u64 config)
-{
-       int alt_idx = idx;
-
-       if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
-               return idx;
-
-       if (idx == EXTRA_REG_RSP_0)
-               alt_idx = EXTRA_REG_RSP_1;
-
-       if (idx == EXTRA_REG_RSP_1)
-               alt_idx = EXTRA_REG_RSP_0;
-
-       if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
-               return idx;
-
-       return alt_idx;
-}
-
-static void intel_fixup_er(struct perf_event *event, int idx)
-{
-       event->hw.extra_reg.idx = idx;
-
-       if (idx == EXTRA_REG_RSP_0) {
-               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
-               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
-       } else if (idx == EXTRA_REG_RSP_1) {
-               event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
-               event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
-               event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
-       }
-}
-
-/*
- * manage allocation of shared extra msr for certain events
- *
- * sharing can be:
- * per-cpu: to be shared between the various events on a single PMU
- * per-core: per-cpu + shared by HT threads
- */
-static struct event_constraint *
-__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
-                                  struct perf_event *event,
-                                  struct hw_perf_event_extra *reg)
-{
-       struct event_constraint *c = &emptyconstraint;
-       struct er_account *era;
-       unsigned long flags;
-       int idx = reg->idx;
-
-       /*
-        * reg->alloc can be set due to existing state, so for fake cpuc we
-        * need to ignore this, otherwise we might fail to allocate proper fake
-        * state for this extra reg constraint. Also see the comment below.
-        */
-       if (reg->alloc && !cpuc->is_fake)
-               return NULL; /* call x86_get_event_constraint() */
-
-again:
-       era = &cpuc->shared_regs->regs[idx];
-       /*
-        * we use spin_lock_irqsave() to avoid lockdep issues when
-        * passing a fake cpuc
-        */
-       raw_spin_lock_irqsave(&era->lock, flags);
-
-       if (!atomic_read(&era->ref) || era->config == reg->config) {
-
-               /*
-                * If its a fake cpuc -- as per validate_{group,event}() we
-                * shouldn't touch event state and we can avoid doing so
-                * since both will only call get_event_constraints() once
-                * on each event, this avoids the need for reg->alloc.
-                *
-                * Not doing the ER fixup will only result in era->reg being
-                * wrong, but since we won't actually try and program hardware
-                * this isn't a problem either.
-                */
-               if (!cpuc->is_fake) {
-                       if (idx != reg->idx)
-                               intel_fixup_er(event, idx);
-
-                       /*
-                        * x86_schedule_events() can call get_event_constraints()
-                        * multiple times on events in the case of incremental
-                        * scheduling(). reg->alloc ensures we only do the ER
-                        * allocation once.
-                        */
-                       reg->alloc = 1;
-               }
-
-               /* lock in msr value */
-               era->config = reg->config;
-               era->reg = reg->reg;
-
-               /* one more user */
-               atomic_inc(&era->ref);
-
-               /*
-                * need to call x86_get_event_constraint()
-                * to check if associated event has constraints
-                */
-               c = NULL;
-       } else {
-               idx = intel_alt_er(idx, reg->config);
-               if (idx != reg->idx) {
-                       raw_spin_unlock_irqrestore(&era->lock, flags);
-                       goto again;
-               }
-       }
-       raw_spin_unlock_irqrestore(&era->lock, flags);
-
-       return c;
-}
-
-static void
-__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
-                                  struct hw_perf_event_extra *reg)
-{
-       struct er_account *era;
-
-       /*
-        * Only put constraint if extra reg was actually allocated. Also takes
-        * care of event which do not use an extra shared reg.
-        *
-        * Also, if this is a fake cpuc we shouldn't touch any event state
-        * (reg->alloc) and we don't care about leaving inconsistent cpuc state
-        * either since it'll be thrown out.
-        */
-       if (!reg->alloc || cpuc->is_fake)
-               return;
-
-       era = &cpuc->shared_regs->regs[reg->idx];
-
-       /* one fewer user */
-       atomic_dec(&era->ref);
-
-       /* allocate again next time */
-       reg->alloc = 0;
-}
-
-static struct event_constraint *
-intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
-                             struct perf_event *event)
-{
-       struct event_constraint *c = NULL, *d;
-       struct hw_perf_event_extra *xreg, *breg;
-
-       xreg = &event->hw.extra_reg;
-       if (xreg->idx != EXTRA_REG_NONE) {
-               c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
-               if (c == &emptyconstraint)
-                       return c;
-       }
-       breg = &event->hw.branch_reg;
-       if (breg->idx != EXTRA_REG_NONE) {
-               d = __intel_shared_reg_get_constraints(cpuc, event, breg);
-               if (d == &emptyconstraint) {
-                       __intel_shared_reg_put_constraints(cpuc, xreg);
-                       c = d;
-               }
-       }
-       return c;
-}
-
-struct event_constraint *
-x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       if (x86_pmu.event_constraints) {
-               for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if ((event->hw.config & c->cmask) == c->code) {
-                               event->hw.flags |= c->flags;
-                               return c;
-                       }
-               }
-       }
-
-       return &unconstrained;
-}
-
-static struct event_constraint *
-__intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                           struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       c = intel_bts_constraints(event);
-       if (c)
-               return c;
-
-       c = intel_shared_regs_constraints(cpuc, event);
-       if (c)
-               return c;
-
-       c = intel_pebs_constraints(event);
-       if (c)
-               return c;
-
-       return x86_get_event_constraints(cpuc, idx, event);
-}
-
-static void
-intel_start_scheduling(struct cpu_hw_events *cpuc)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xl;
-       int tid = cpuc->excl_thread_id;
-
-       /*
-        * nothing needed if in group validation mode
-        */
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return;
-
-       /*
-        * no exclusion needed
-        */
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       xl = &excl_cntrs->states[tid];
-
-       xl->sched_started = true;
-       /*
-        * lock shared state until we are done scheduling
-        * in stop_event_scheduling()
-        * makes scheduling appear as a transaction
-        */
-       raw_spin_lock(&excl_cntrs->lock);
-}
-
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct event_constraint *c = cpuc->event_constraint[idx];
-       struct intel_excl_states *xl;
-       int tid = cpuc->excl_thread_id;
-
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return;
-
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
-               return;
-
-       xl = &excl_cntrs->states[tid];
-
-       lockdep_assert_held(&excl_cntrs->lock);
-
-       if (c->flags & PERF_X86_EVENT_EXCL)
-               xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
-       else
-               xl->state[cntr] = INTEL_EXCL_SHARED;
-}
-
-static void
-intel_stop_scheduling(struct cpu_hw_events *cpuc)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xl;
-       int tid = cpuc->excl_thread_id;
-
-       /*
-        * nothing needed if in group validation mode
-        */
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return;
-       /*
-        * no exclusion needed
-        */
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       xl = &excl_cntrs->states[tid];
-
-       xl->sched_started = false;
-       /*
-        * release shared state lock (acquired in intel_start_scheduling())
-        */
-       raw_spin_unlock(&excl_cntrs->lock);
-}
-
-static struct event_constraint *
-intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
-                          int idx, struct event_constraint *c)
-{
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       struct intel_excl_states *xlo;
-       int tid = cpuc->excl_thread_id;
-       int is_excl, i;
-
-       /*
-        * validating a group does not require
-        * enforcing cross-thread  exclusion
-        */
-       if (cpuc->is_fake || !is_ht_workaround_enabled())
-               return c;
-
-       /*
-        * no exclusion needed
-        */
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return c;
-
-       /*
-        * because we modify the constraint, we need
-        * to make a copy. Static constraints come
-        * from static const tables.
-        *
-        * only needed when constraint has not yet
-        * been cloned (marked dynamic)
-        */
-       if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
-               struct event_constraint *cx;
-
-               /*
-                * grab pre-allocated constraint entry
-                */
-               cx = &cpuc->constraint_list[idx];
-
-               /*
-                * initialize dynamic constraint
-                * with static constraint
-                */
-               *cx = *c;
-
-               /*
-                * mark constraint as dynamic, so we
-                * can free it later on
-                */
-               cx->flags |= PERF_X86_EVENT_DYNAMIC;
-               c = cx;
-       }
-
-       /*
-        * From here on, the constraint is dynamic.
-        * Either it was just allocated above, or it
-        * was allocated during a earlier invocation
-        * of this function
-        */
-
-       /*
-        * state of sibling HT
-        */
-       xlo = &excl_cntrs->states[tid ^ 1];
-
-       /*
-        * event requires exclusive counter access
-        * across HT threads
-        */
-       is_excl = c->flags & PERF_X86_EVENT_EXCL;
-       if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
-               event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
-               if (!cpuc->n_excl++)
-                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
-       }
-
-       /*
-        * Modify static constraint with current dynamic
-        * state of thread
-        *
-        * EXCLUSIVE: sibling counter measuring exclusive event
-        * SHARED   : sibling counter measuring non-exclusive event
-        * UNUSED   : sibling counter unused
-        */
-       for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
-               /*
-                * exclusive event in sibling counter
-                * our corresponding counter cannot be used
-                * regardless of our event
-                */
-               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
-                       __clear_bit(i, c->idxmsk);
-               /*
-                * if measuring an exclusive event, sibling
-                * measuring non-exclusive, then counter cannot
-                * be used
-                */
-               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
-                       __clear_bit(i, c->idxmsk);
-       }
-
-       /*
-        * recompute actual bit weight for scheduling algorithm
-        */
-       c->weight = hweight64(c->idxmsk64);
-
-       /*
-        * if we return an empty mask, then switch
-        * back to static empty constraint to avoid
-        * the cost of freeing later on
-        */
-       if (c->weight == 0)
-               c = &emptyconstraint;
-
-       return c;
-}
-
-static struct event_constraint *
-intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                           struct perf_event *event)
-{
-       struct event_constraint *c1 = NULL;
-       struct event_constraint *c2;
-
-       if (idx >= 0) /* fake does < 0 */
-               c1 = cpuc->event_constraint[idx];
-
-       /*
-        * first time only
-        * - static constraint: no change across incremental scheduling calls
-        * - dynamic constraint: handled by intel_get_excl_constraints()
-        */
-       c2 = __intel_get_event_constraints(cpuc, idx, event);
-       if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
-               bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
-               c1->weight = c2->weight;
-               c2 = c1;
-       }
-
-       if (cpuc->excl_cntrs)
-               return intel_get_excl_constraints(cpuc, event, idx, c2);
-
-       return c2;
-}
-
-static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
-               struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-       int tid = cpuc->excl_thread_id;
-       struct intel_excl_states *xl;
-
-       /*
-        * nothing needed if in group validation mode
-        */
-       if (cpuc->is_fake)
-               return;
-
-       if (WARN_ON_ONCE(!excl_cntrs))
-               return;
-
-       if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
-               hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
-               if (!--cpuc->n_excl)
-                       WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
-       }
-
-       /*
-        * If event was actually assigned, then mark the counter state as
-        * unused now.
-        */
-       if (hwc->idx >= 0) {
-               xl = &excl_cntrs->states[tid];
-
-               /*
-                * put_constraint may be called from x86_schedule_events()
-                * which already has the lock held so here make locking
-                * conditional.
-                */
-               if (!xl->sched_started)
-                       raw_spin_lock(&excl_cntrs->lock);
-
-               xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
-
-               if (!xl->sched_started)
-                       raw_spin_unlock(&excl_cntrs->lock);
-       }
-}
-
-static void
-intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
-                                       struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg;
-
-       reg = &event->hw.extra_reg;
-       if (reg->idx != EXTRA_REG_NONE)
-               __intel_shared_reg_put_constraints(cpuc, reg);
-
-       reg = &event->hw.branch_reg;
-       if (reg->idx != EXTRA_REG_NONE)
-               __intel_shared_reg_put_constraints(cpuc, reg);
-}
-
-static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
-                                       struct perf_event *event)
-{
-       intel_put_shared_regs_event_constraints(cpuc, event);
-
-       /*
-        * is PMU has exclusive counter restrictions, then
-        * all events are subject to and must call the
-        * put_excl_constraints() routine
-        */
-       if (cpuc->excl_cntrs)
-               intel_put_excl_constraints(cpuc, event);
-}
-
-static void intel_pebs_aliases_core2(struct perf_event *event)
-{
-       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
-               /*
-                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
-                * (0x003c) so that we can use it with PEBS.
-                *
-                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
-                * PEBS capable. However we can use INST_RETIRED.ANY_P
-                * (0x00c0), which is a PEBS capable event, to get the same
-                * count.
-                *
-                * INST_RETIRED.ANY_P counts the number of cycles that retires
-                * CNTMASK instructions. By setting CNTMASK to a value (16)
-                * larger than the maximum number of instructions that can be
-                * retired per cycle (4) and then inverting the condition, we
-                * count all cycles that retire 16 or less instructions, which
-                * is every cycle.
-                *
-                * Thereby we gain a PEBS capable cycle counter.
-                */
-               u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
-
-               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
-               event->hw.config = alt_config;
-       }
-}
-
-static void intel_pebs_aliases_snb(struct perf_event *event)
-{
-       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
-               /*
-                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
-                * (0x003c) so that we can use it with PEBS.
-                *
-                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
-                * PEBS capable. However we can use UOPS_RETIRED.ALL
-                * (0x01c2), which is a PEBS capable event, to get the same
-                * count.
-                *
-                * UOPS_RETIRED.ALL counts the number of cycles that retires
-                * CNTMASK micro-ops. By setting CNTMASK to a value (16)
-                * larger than the maximum number of micro-ops that can be
-                * retired per cycle (4) and then inverting the condition, we
-                * count all cycles that retire 16 or less micro-ops, which
-                * is every cycle.
-                *
-                * Thereby we gain a PEBS capable cycle counter.
-                */
-               u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
-
-               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
-               event->hw.config = alt_config;
-       }
-}
-
-static void intel_pebs_aliases_precdist(struct perf_event *event)
-{
-       if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
-               /*
-                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
-                * (0x003c) so that we can use it with PEBS.
-                *
-                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
-                * PEBS capable. However we can use INST_RETIRED.PREC_DIST
-                * (0x01c0), which is a PEBS capable event, to get the same
-                * count.
-                *
-                * The PREC_DIST event has special support to minimize sample
-                * shadowing effects. One drawback is that it can be
-                * only programmed on counter 1, but that seems like an
-                * acceptable trade off.
-                */
-               u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
-
-               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
-               event->hw.config = alt_config;
-       }
-}
-
-static void intel_pebs_aliases_ivb(struct perf_event *event)
-{
-       if (event->attr.precise_ip < 3)
-               return intel_pebs_aliases_snb(event);
-       return intel_pebs_aliases_precdist(event);
-}
-
-static void intel_pebs_aliases_skl(struct perf_event *event)
-{
-       if (event->attr.precise_ip < 3)
-               return intel_pebs_aliases_core2(event);
-       return intel_pebs_aliases_precdist(event);
-}
-
-static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
-{
-       unsigned long flags = x86_pmu.free_running_flags;
-
-       if (event->attr.use_clockid)
-               flags &= ~PERF_SAMPLE_TIME;
-       return flags;
-}
-
-static int intel_pmu_hw_config(struct perf_event *event)
-{
-       int ret = x86_pmu_hw_config(event);
-
-       if (ret)
-               return ret;
-
-       if (event->attr.precise_ip) {
-               if (!event->attr.freq) {
-                       event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
-                       if (!(event->attr.sample_type &
-                             ~intel_pmu_free_running_flags(event)))
-                               event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
-               }
-               if (x86_pmu.pebs_aliases)
-                       x86_pmu.pebs_aliases(event);
-       }
-
-       if (needs_branch_stack(event)) {
-               ret = intel_pmu_setup_lbr_filter(event);
-               if (ret)
-                       return ret;
-
-               /*
-                * BTS is set up earlier in this path, so don't account twice
-                */
-               if (!intel_pmu_has_bts(event)) {
-                       /* disallow lbr if conflicting events are present */
-                       if (x86_add_exclusive(x86_lbr_exclusive_lbr))
-                               return -EBUSY;
-
-                       event->destroy = hw_perf_lbr_event_destroy;
-               }
-       }
-
-       if (event->attr.type != PERF_TYPE_RAW)
-               return 0;
-
-       if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
-               return 0;
-
-       if (x86_pmu.version < 3)
-               return -EINVAL;
-
-       if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
-
-       return 0;
-}
-
-struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
-{
-       if (x86_pmu.guest_get_msrs)
-               return x86_pmu.guest_get_msrs(nr);
-       *nr = 0;
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
-
-static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-
-       arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
-       arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
-       arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-       /*
-        * If PMU counter has PEBS enabled it is not enough to disable counter
-        * on a guest entry since PEBS memory write can overshoot guest entry
-        * and corrupt guest memory. Disabling PEBS solves the problem.
-        */
-       arr[1].msr = MSR_IA32_PEBS_ENABLE;
-       arr[1].host = cpuc->pebs_enabled;
-       arr[1].guest = 0;
-
-       *nr = 2;
-       return arr;
-}
-
-static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
-               struct perf_event *event = cpuc->events[idx];
-
-               arr[idx].msr = x86_pmu_config_addr(idx);
-               arr[idx].host = arr[idx].guest = 0;
-
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-
-               arr[idx].host = arr[idx].guest =
-                       event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
-
-               if (event->attr.exclude_host)
-                       arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-               else if (event->attr.exclude_guest)
-                       arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-       }
-
-       *nr = x86_pmu.num_counters;
-       return arr;
-}
-
-static void core_pmu_enable_event(struct perf_event *event)
-{
-       if (!event->attr.exclude_host)
-               x86_pmu_enable_event(event);
-}
-
-static void core_pmu_enable_all(int added)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
-
-               if (!test_bit(idx, cpuc->active_mask) ||
-                               cpuc->events[idx]->attr.exclude_host)
-                       continue;
-
-               __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
-       }
-}
-
-static int hsw_hw_config(struct perf_event *event)
-{
-       int ret = intel_pmu_hw_config(event);
-
-       if (ret)
-               return ret;
-       if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
-               return 0;
-       event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
-
-       /*
-        * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
-        * PEBS or in ANY thread mode. Since the results are non-sensical forbid
-        * this combination.
-        */
-       if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
-            ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
-             event->attr.precise_ip > 0))
-               return -EOPNOTSUPP;
-
-       if (event_is_checkpointed(event)) {
-               /*
-                * Sampling of checkpointed events can cause situations where
-                * the CPU constantly aborts because of a overflow, which is
-                * then checkpointed back and ignored. Forbid checkpointing
-                * for sampling.
-                *
-                * But still allow a long sampling period, so that perf stat
-                * from KVM works.
-                */
-               if (event->attr.sample_period > 0 &&
-                   event->attr.sample_period < 0x7fffffff)
-                       return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-static struct event_constraint counter2_constraint =
-                       EVENT_CONSTRAINT(0, 0x4, 0);
-
-static struct event_constraint *
-hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
-                         struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       c = intel_get_event_constraints(cpuc, idx, event);
-
-       /* Handle special quirk on in_tx_checkpointed only in counter 2 */
-       if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
-               if (c->idxmsk64 & (1U << 2))
-                       return &counter2_constraint;
-               return &emptyconstraint;
-       }
-
-       return c;
-}
-
-/*
- * Broadwell:
- *
- * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
- * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
- * the two to enforce a minimum period of 128 (the smallest value that has bits
- * 0-5 cleared and >= 100).
- *
- * Because of how the code in x86_perf_event_set_period() works, the truncation
- * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
- * to make up for the 'lost' events due to carrying the 'error' in period_left.
- *
- * Therefore the effective (average) period matches the requested period,
- * despite coarser hardware granularity.
- */
-static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
-{
-       if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
-                       X86_CONFIG(.event=0xc0, .umask=0x01)) {
-               if (left < 128)
-                       left = 128;
-               left &= ~0x3fu;
-       }
-       return left;
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7"    );
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(pc,    "config:19"     );
-PMU_FORMAT_ATTR(any,   "config:21"     ); /* v3 + */
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-PMU_FORMAT_ATTR(in_tx,  "config:32");
-PMU_FORMAT_ATTR(in_tx_cp, "config:33");
-
-static struct attribute *intel_arch_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_pc.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-ssize_t intel_event_sysfs_show(char *page, u64 config)
-{
-       u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
-
-       return x86_event_sysfs_show(page, config, event);
-}
-
-struct intel_shared_regs *allocate_shared_regs(int cpu)
-{
-       struct intel_shared_regs *regs;
-       int i;
-
-       regs = kzalloc_node(sizeof(struct intel_shared_regs),
-                           GFP_KERNEL, cpu_to_node(cpu));
-       if (regs) {
-               /*
-                * initialize the locks to keep lockdep happy
-                */
-               for (i = 0; i < EXTRA_REG_MAX; i++)
-                       raw_spin_lock_init(&regs->regs[i].lock);
-
-               regs->core_id = -1;
-       }
-       return regs;
-}
-
-static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
-{
-       struct intel_excl_cntrs *c;
-
-       c = kzalloc_node(sizeof(struct intel_excl_cntrs),
-                        GFP_KERNEL, cpu_to_node(cpu));
-       if (c) {
-               raw_spin_lock_init(&c->lock);
-               c->core_id = -1;
-       }
-       return c;
-}
-
-static int intel_pmu_cpu_prepare(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-
-       if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
-               cpuc->shared_regs = allocate_shared_regs(cpu);
-               if (!cpuc->shared_regs)
-                       goto err;
-       }
-
-       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
-               size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
-
-               cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
-               if (!cpuc->constraint_list)
-                       goto err_shared_regs;
-
-               cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
-               if (!cpuc->excl_cntrs)
-                       goto err_constraint_list;
-
-               cpuc->excl_thread_id = 0;
-       }
-
-       return NOTIFY_OK;
-
-err_constraint_list:
-       kfree(cpuc->constraint_list);
-       cpuc->constraint_list = NULL;
-
-err_shared_regs:
-       kfree(cpuc->shared_regs);
-       cpuc->shared_regs = NULL;
-
-err:
-       return NOTIFY_BAD;
-}
-
-static void intel_pmu_cpu_starting(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       int core_id = topology_core_id(cpu);
-       int i;
-
-       init_debug_store_on_cpu(cpu);
-       /*
-        * Deal with CPUs that don't clear their LBRs on power-up.
-        */
-       intel_pmu_lbr_reset();
-
-       cpuc->lbr_sel = NULL;
-
-       if (!cpuc->shared_regs)
-               return;
-
-       if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
-               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
-                       struct intel_shared_regs *pc;
-
-                       pc = per_cpu(cpu_hw_events, i).shared_regs;
-                       if (pc && pc->core_id == core_id) {
-                               cpuc->kfree_on_online[0] = cpuc->shared_regs;
-                               cpuc->shared_regs = pc;
-                               break;
-                       }
-               }
-               cpuc->shared_regs->core_id = core_id;
-               cpuc->shared_regs->refcnt++;
-       }
-
-       if (x86_pmu.lbr_sel_map)
-               cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
-
-       if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
-               for_each_cpu(i, topology_sibling_cpumask(cpu)) {
-                       struct intel_excl_cntrs *c;
-
-                       c = per_cpu(cpu_hw_events, i).excl_cntrs;
-                       if (c && c->core_id == core_id) {
-                               cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
-                               cpuc->excl_cntrs = c;
-                               cpuc->excl_thread_id = 1;
-                               break;
-                       }
-               }
-               cpuc->excl_cntrs->core_id = core_id;
-               cpuc->excl_cntrs->refcnt++;
-       }
-}
-
-static void free_excl_cntrs(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       struct intel_excl_cntrs *c;
-
-       c = cpuc->excl_cntrs;
-       if (c) {
-               if (c->core_id == -1 || --c->refcnt == 0)
-                       kfree(c);
-               cpuc->excl_cntrs = NULL;
-               kfree(cpuc->constraint_list);
-               cpuc->constraint_list = NULL;
-       }
-}
-
-static void intel_pmu_cpu_dying(int cpu)
-{
-       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       struct intel_shared_regs *pc;
-
-       pc = cpuc->shared_regs;
-       if (pc) {
-               if (pc->core_id == -1 || --pc->refcnt == 0)
-                       kfree(pc);
-               cpuc->shared_regs = NULL;
-       }
-
-       free_excl_cntrs(cpu);
-
-       fini_debug_store_on_cpu(cpu);
-}
-
-static void intel_pmu_sched_task(struct perf_event_context *ctx,
-                                bool sched_in)
-{
-       if (x86_pmu.pebs_active)
-               intel_pmu_pebs_sched_task(ctx, sched_in);
-       if (x86_pmu.lbr_nr)
-               intel_pmu_lbr_sched_task(ctx, sched_in);
-}
-
-PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
-
-PMU_FORMAT_ATTR(ldlat, "config1:0-15");
-
-PMU_FORMAT_ATTR(frontend, "config1:0-23");
-
-static struct attribute *intel_arch3_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_pc.attr,
-       &format_attr_any.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       &format_attr_in_tx.attr,
-       &format_attr_in_tx_cp.attr,
-
-       &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
-       &format_attr_ldlat.attr, /* PEBS load latency */
-       NULL,
-};
-
-static struct attribute *skl_format_attr[] = {
-       &format_attr_frontend.attr,
-       NULL,
-};
-
-static __initconst const struct x86_pmu core_pmu = {
-       .name                   = "core",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = x86_pmu_disable_all,
-       .enable_all             = core_pmu_enable_all,
-       .enable                 = core_pmu_enable_event,
-       .disable                = x86_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
-       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
-       .event_map              = intel_pmu_event_map,
-       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
-       .apic                   = 1,
-       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
-
-       /*
-        * Intel PMCs cannot be accessed sanely above 32-bit width,
-        * so we install an artificial 1<<31 period regardless of
-        * the generic event period:
-        */
-       .max_period             = (1ULL<<31) - 1,
-       .get_event_constraints  = intel_get_event_constraints,
-       .put_event_constraints  = intel_put_event_constraints,
-       .event_constraints      = intel_core_event_constraints,
-       .guest_get_msrs         = core_guest_get_msrs,
-       .format_attrs           = intel_arch_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-
-       /*
-        * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
-        * together with PMU version 1 and thus be using core_pmu with
-        * shared_regs. We need following callbacks here to allocate
-        * it properly.
-        */
-       .cpu_prepare            = intel_pmu_cpu_prepare,
-       .cpu_starting           = intel_pmu_cpu_starting,
-       .cpu_dying              = intel_pmu_cpu_dying,
-};
-
-static __initconst const struct x86_pmu intel_pmu = {
-       .name                   = "Intel",
-       .handle_irq             = intel_pmu_handle_irq,
-       .disable_all            = intel_pmu_disable_all,
-       .enable_all             = intel_pmu_enable_all,
-       .enable                 = intel_pmu_enable_event,
-       .disable                = intel_pmu_disable_event,
-       .hw_config              = intel_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
-       .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
-       .event_map              = intel_pmu_event_map,
-       .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
-       .apic                   = 1,
-       .free_running_flags     = PEBS_FREERUNNING_FLAGS,
-       /*
-        * Intel PMCs cannot be accessed sanely above 32 bit width,
-        * so we install an artificial 1<<31 period regardless of
-        * the generic event period:
-        */
-       .max_period             = (1ULL << 31) - 1,
-       .get_event_constraints  = intel_get_event_constraints,
-       .put_event_constraints  = intel_put_event_constraints,
-       .pebs_aliases           = intel_pebs_aliases_core2,
-
-       .format_attrs           = intel_arch3_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-
-       .cpu_prepare            = intel_pmu_cpu_prepare,
-       .cpu_starting           = intel_pmu_cpu_starting,
-       .cpu_dying              = intel_pmu_cpu_dying,
-       .guest_get_msrs         = intel_guest_get_msrs,
-       .sched_task             = intel_pmu_sched_task,
-};
-
-static __init void intel_clovertown_quirk(void)
-{
-       /*
-        * PEBS is unreliable due to:
-        *
-        *   AJ67  - PEBS may experience CPL leaks
-        *   AJ68  - PEBS PMI may be delayed by one event
-        *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
-        *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
-        *
-        * AJ67 could be worked around by restricting the OS/USR flags.
-        * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
-        *
-        * AJ106 could possibly be worked around by not allowing LBR
-        *       usage from PEBS, including the fixup.
-        * AJ68  could possibly be worked around by always programming
-        *       a pebs_event_reset[0] value and coping with the lost events.
-        *
-        * But taken together it might just make sense to not enable PEBS on
-        * these chips.
-        */
-       pr_warn("PEBS disabled due to CPU errata\n");
-       x86_pmu.pebs = 0;
-       x86_pmu.pebs_constraints = NULL;
-}
-
-static int intel_snb_pebs_broken(int cpu)
-{
-       u32 rev = UINT_MAX; /* default to broken for unknown models */
-
-       switch (cpu_data(cpu).x86_model) {
-       case 42: /* SNB */
-               rev = 0x28;
-               break;
-
-       case 45: /* SNB-EP */
-               switch (cpu_data(cpu).x86_mask) {
-               case 6: rev = 0x618; break;
-               case 7: rev = 0x70c; break;
-               }
-       }
-
-       return (cpu_data(cpu).microcode < rev);
-}
-
-static void intel_snb_check_microcode(void)
-{
-       int pebs_broken = 0;
-       int cpu;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               if ((pebs_broken = intel_snb_pebs_broken(cpu)))
-                       break;
-       }
-       put_online_cpus();
-
-       if (pebs_broken == x86_pmu.pebs_broken)
-               return;
-
-       /*
-        * Serialized by the microcode lock..
-        */
-       if (x86_pmu.pebs_broken) {
-               pr_info("PEBS enabled due to microcode update\n");
-               x86_pmu.pebs_broken = 0;
-       } else {
-               pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
-               x86_pmu.pebs_broken = 1;
-       }
-}
-
-/*
- * Under certain circumstances, access certain MSR may cause #GP.
- * The function tests if the input MSR can be safely accessed.
- */
-static bool check_msr(unsigned long msr, u64 mask)
-{
-       u64 val_old, val_new, val_tmp;
-
-       /*
-        * Read the current value, change it and read it back to see if it
-        * matches, this is needed to detect certain hardware emulators
-        * (qemu/kvm) that don't trap on the MSR access and always return 0s.
-        */
-       if (rdmsrl_safe(msr, &val_old))
-               return false;
-
-       /*
-        * Only change the bits which can be updated by wrmsrl.
-        */
-       val_tmp = val_old ^ mask;
-       if (wrmsrl_safe(msr, val_tmp) ||
-           rdmsrl_safe(msr, &val_new))
-               return false;
-
-       if (val_new != val_tmp)
-               return false;
-
-       /* Here it's sure that the MSR can be safely accessed.
-        * Restore the old value and return.
-        */
-       wrmsrl(msr, val_old);
-
-       return true;
-}
-
-static __init void intel_sandybridge_quirk(void)
-{
-       x86_pmu.check_microcode = intel_snb_check_microcode;
-       intel_snb_check_microcode();
-}
-
-static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
-       { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
-       { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
-       { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
-       { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
-       { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
-       { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
-       { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
-};
-
-static __init void intel_arch_events_quirk(void)
-{
-       int bit;
-
-       /* disable event that reported as not presend by cpuid */
-       for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
-               intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
-               pr_warn("CPUID marked event: \'%s\' unavailable\n",
-                       intel_arch_events_map[bit].name);
-       }
-}
-
-static __init void intel_nehalem_quirk(void)
-{
-       union cpuid10_ebx ebx;
-
-       ebx.full = x86_pmu.events_maskl;
-       if (ebx.split.no_branch_misses_retired) {
-               /*
-                * Erratum AAJ80 detected, we work it around by using
-                * the BR_MISP_EXEC.ANY event. This will over-count
-                * branch-misses, but it's still much better than the
-                * architectural event which is often completely bogus:
-                */
-               intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
-               ebx.split.no_branch_misses_retired = 0;
-               x86_pmu.events_maskl = ebx.full;
-               pr_info("CPU erratum AAJ80 worked around\n");
-       }
-}
-
-/*
- * enable software workaround for errata:
- * SNB: BJ122
- * IVB: BV98
- * HSW: HSD29
- *
- * Only needed when HT is enabled. However detecting
- * if HT is enabled is difficult (model specific). So instead,
- * we enable the workaround in the early boot, and verify if
- * it is needed in a later initcall phase once we have valid
- * topology information to check if HT is actually enabled
- */
-static __init void intel_ht_bug(void)
-{
-       x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
-
-       x86_pmu.start_scheduling = intel_start_scheduling;
-       x86_pmu.commit_scheduling = intel_commit_scheduling;
-       x86_pmu.stop_scheduling = intel_stop_scheduling;
-}
-
-EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
-
-/* Haswell special events */
-EVENT_ATTR_STR(tx-start,       tx_start,       "event=0xc9,umask=0x1");
-EVENT_ATTR_STR(tx-commit,      tx_commit,      "event=0xc9,umask=0x2");
-EVENT_ATTR_STR(tx-abort,       tx_abort,       "event=0xc9,umask=0x4");
-EVENT_ATTR_STR(tx-capacity,    tx_capacity,    "event=0x54,umask=0x2");
-EVENT_ATTR_STR(tx-conflict,    tx_conflict,    "event=0x54,umask=0x1");
-EVENT_ATTR_STR(el-start,       el_start,       "event=0xc8,umask=0x1");
-EVENT_ATTR_STR(el-commit,      el_commit,      "event=0xc8,umask=0x2");
-EVENT_ATTR_STR(el-abort,       el_abort,       "event=0xc8,umask=0x4");
-EVENT_ATTR_STR(el-capacity,    el_capacity,    "event=0x54,umask=0x2");
-EVENT_ATTR_STR(el-conflict,    el_conflict,    "event=0x54,umask=0x1");
-EVENT_ATTR_STR(cycles-t,       cycles_t,       "event=0x3c,in_tx=1");
-EVENT_ATTR_STR(cycles-ct,      cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
-
-static struct attribute *hsw_events_attrs[] = {
-       EVENT_PTR(tx_start),
-       EVENT_PTR(tx_commit),
-       EVENT_PTR(tx_abort),
-       EVENT_PTR(tx_capacity),
-       EVENT_PTR(tx_conflict),
-       EVENT_PTR(el_start),
-       EVENT_PTR(el_commit),
-       EVENT_PTR(el_abort),
-       EVENT_PTR(el_capacity),
-       EVENT_PTR(el_conflict),
-       EVENT_PTR(cycles_t),
-       EVENT_PTR(cycles_ct),
-       EVENT_PTR(mem_ld_hsw),
-       EVENT_PTR(mem_st_hsw),
-       NULL
-};
-
-__init int intel_pmu_init(void)
-{
-       union cpuid10_edx edx;
-       union cpuid10_eax eax;
-       union cpuid10_ebx ebx;
-       struct event_constraint *c;
-       unsigned int unused;
-       struct extra_reg *er;
-       int version, i;
-
-       if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-               switch (boot_cpu_data.x86) {
-               case 0x6:
-                       return p6_pmu_init();
-               case 0xb:
-                       return knc_pmu_init();
-               case 0xf:
-                       return p4_pmu_init();
-               }
-               return -ENODEV;
-       }
-
-       /*
-        * Check whether the Architectural PerfMon supports
-        * Branch Misses Retired hw_event or not.
-        */
-       cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
-       if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
-               return -ENODEV;
-
-       version = eax.split.version_id;
-       if (version < 2)
-               x86_pmu = core_pmu;
-       else
-               x86_pmu = intel_pmu;
-
-       x86_pmu.version                 = version;
-       x86_pmu.num_counters            = eax.split.num_counters;
-       x86_pmu.cntval_bits             = eax.split.bit_width;
-       x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
-
-       x86_pmu.events_maskl            = ebx.full;
-       x86_pmu.events_mask_len         = eax.split.mask_length;
-
-       x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
-
-       /*
-        * Quirk: v2 perfmon does not report fixed-purpose events, so
-        * assume at least 3 events:
-        */
-       if (version > 1)
-               x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
-
-       if (boot_cpu_has(X86_FEATURE_PDCM)) {
-               u64 capabilities;
-
-               rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
-               x86_pmu.intel_cap.capabilities = capabilities;
-       }
-
-       intel_ds_init();
-
-       x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
-
-       /*
-        * Install the hw-cache-events table:
-        */
-       switch (boot_cpu_data.x86_model) {
-       case 14: /* 65nm Core "Yonah" */
-               pr_cont("Core events, ");
-               break;
-
-       case 15: /* 65nm Core2 "Merom"          */
-               x86_add_quirk(intel_clovertown_quirk);
-       case 22: /* 65nm Core2 "Merom-L"        */
-       case 23: /* 45nm Core2 "Penryn"         */
-       case 29: /* 45nm Core2 "Dunnington (MP) */
-               memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-
-               intel_pmu_lbr_init_core();
-
-               x86_pmu.event_constraints = intel_core2_event_constraints;
-               x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
-               pr_cont("Core2 events, ");
-               break;
-
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-               memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_nhm();
-
-               x86_pmu.event_constraints = intel_nehalem_event_constraints;
-               x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
-               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
-               x86_pmu.extra_regs = intel_nehalem_extra_regs;
-
-               x86_pmu.cpu_events = nhm_events_attrs;
-
-               /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
-                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
-
-               x86_add_quirk(intel_nehalem_quirk);
-
-               pr_cont("Nehalem events, ");
-               break;
-
-       case 28: /* 45nm Atom "Pineview"   */
-       case 38: /* 45nm Atom "Lincroft"   */
-       case 39: /* 32nm Atom "Penwell"    */
-       case 53: /* 32nm Atom "Cloverview" */
-       case 54: /* 32nm Atom "Cedarview"  */
-               memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-
-               intel_pmu_lbr_init_atom();
-
-               x86_pmu.event_constraints = intel_gen_event_constraints;
-               x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
-               pr_cont("Atom events, ");
-               break;
-
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 76: /* 14nm Atom "Airmont"                   */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-               memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
-                       sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_atom();
-
-               x86_pmu.event_constraints = intel_slm_event_constraints;
-               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_slm_extra_regs;
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               pr_cont("Silvermont events, ");
-               break;
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_nhm();
-
-               x86_pmu.event_constraints = intel_westmere_event_constraints;
-               x86_pmu.enable_all = intel_pmu_nhm_enable_all;
-               x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_westmere_extra_regs;
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-
-               x86_pmu.cpu_events = nhm_events_attrs;
-
-               /* UOPS_ISSUED.STALLED_CYCLES */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-               /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
-                       X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
-
-               pr_cont("Westmere events, ");
-               break;
-
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-               x86_add_quirk(intel_sandybridge_quirk);
-               x86_add_quirk(intel_ht_bug);
-               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_snb();
-
-               x86_pmu.event_constraints = intel_snb_event_constraints;
-               x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
-               if (boot_cpu_data.x86_model == 45)
-                       x86_pmu.extra_regs = intel_snbep_extra_regs;
-               else
-                       x86_pmu.extra_regs = intel_snb_extra_regs;
-
-
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.cpu_events = snb_events_attrs;
-
-               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-               /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
-                       X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
-
-               pr_cont("SandyBridge events, ");
-               break;
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-               x86_add_quirk(intel_ht_bug);
-               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
-                      sizeof(hw_cache_event_ids));
-               /* dTLB-load-misses on IVB is different than SNB */
-               hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
-
-               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
-                      sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_snb();
-
-               x86_pmu.event_constraints = intel_ivb_event_constraints;
-               x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
-               x86_pmu.pebs_prec_dist = true;
-               if (boot_cpu_data.x86_model == 62)
-                       x86_pmu.extra_regs = intel_snbep_extra_regs;
-               else
-                       x86_pmu.extra_regs = intel_snb_extra_regs;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.cpu_events = snb_events_attrs;
-
-               /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
-               intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
-                       X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
-
-               pr_cont("IvyBridge events, ");
-               break;
-
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-               x86_add_quirk(intel_ht_bug);
-               x86_pmu.late_ack = true;
-               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
-               intel_pmu_lbr_init_hsw();
-
-               x86_pmu.event_constraints = intel_hsw_event_constraints;
-               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_snbep_extra_regs;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
-               x86_pmu.pebs_prec_dist = true;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.hw_config = hsw_hw_config;
-               x86_pmu.get_event_constraints = hsw_get_event_constraints;
-               x86_pmu.cpu_events = hsw_events_attrs;
-               x86_pmu.lbr_double_abort = true;
-               pr_cont("Haswell events, ");
-               break;
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-               x86_pmu.late_ack = true;
-               memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
-               /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
-               hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
-                                                                        BDW_L3_MISS|HSW_SNOOP_DRAM;
-               hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
-                                                                         HSW_SNOOP_DRAM;
-               hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
-                                                                            BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
-               hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
-                                                                             BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
-
-               intel_pmu_lbr_init_hsw();
-
-               x86_pmu.event_constraints = intel_bdw_event_constraints;
-               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_snbep_extra_regs;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
-               x86_pmu.pebs_prec_dist = true;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.hw_config = hsw_hw_config;
-               x86_pmu.get_event_constraints = hsw_get_event_constraints;
-               x86_pmu.cpu_events = hsw_events_attrs;
-               x86_pmu.limit_period = bdw_limit_period;
-               pr_cont("Broadwell events, ");
-               break;
-
-       case 87: /* Knights Landing Xeon Phi */
-               memcpy(hw_cache_event_ids,
-                      slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs,
-                      knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-               intel_pmu_lbr_init_knl();
-
-               x86_pmu.event_constraints = intel_slm_event_constraints;
-               x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_knl_extra_regs;
-
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               pr_cont("Knights Landing events, ");
-               break;
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               x86_pmu.late_ack = true;
-               memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-               memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-               intel_pmu_lbr_init_skl();
-
-               x86_pmu.event_constraints = intel_skl_event_constraints;
-               x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
-               x86_pmu.extra_regs = intel_skl_extra_regs;
-               x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
-               x86_pmu.pebs_prec_dist = true;
-               /* all extra regs are per-cpu when HT is on */
-               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-
-               x86_pmu.hw_config = hsw_hw_config;
-               x86_pmu.get_event_constraints = hsw_get_event_constraints;
-               x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
-                                                 skl_format_attr);
-               WARN_ON(!x86_pmu.format_attrs);
-               x86_pmu.cpu_events = hsw_events_attrs;
-               pr_cont("Skylake events, ");
-               break;
-
-       default:
-               switch (x86_pmu.version) {
-               case 1:
-                       x86_pmu.event_constraints = intel_v1_event_constraints;
-                       pr_cont("generic architected perfmon v1, ");
-                       break;
-               default:
-                       /*
-                        * default constraints for v2 and up
-                        */
-                       x86_pmu.event_constraints = intel_gen_event_constraints;
-                       pr_cont("generic architected perfmon, ");
-                       break;
-               }
-       }
-
-       if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
-               WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
-                    x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
-               x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
-       }
-       x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
-       if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
-               WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
-                    x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
-               x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
-       }
-
-       x86_pmu.intel_ctrl |=
-               ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
-
-       if (x86_pmu.event_constraints) {
-               /*
-                * event on fixed counter2 (REF_CYCLES) only works on this
-                * counter, so do not extend mask to generic counters
-                */
-               for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if (c->cmask == FIXED_EVENT_FLAGS
-                           && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
-                               c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
-                       }
-                       c->idxmsk64 &=
-                               ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
-                       c->weight = hweight64(c->idxmsk64);
-               }
-       }
-
-       /*
-        * Access LBR MSR may cause #GP under certain circumstances.
-        * E.g. KVM doesn't support LBR MSR
-        * Check all LBT MSR here.
-        * Disable LBR access if any LBR MSRs can not be accessed.
-        */
-       if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
-               x86_pmu.lbr_nr = 0;
-       for (i = 0; i < x86_pmu.lbr_nr; i++) {
-               if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
-                     check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
-                       x86_pmu.lbr_nr = 0;
-       }
-
-       /*
-        * Access extra MSR may cause #GP under certain circumstances.
-        * E.g. KVM doesn't support offcore event
-        * Check all extra_regs here.
-        */
-       if (x86_pmu.extra_regs) {
-               for (er = x86_pmu.extra_regs; er->msr; er++) {
-                       er->extra_msr_access = check_msr(er->msr, 0x11UL);
-                       /* Disable LBR select mapping */
-                       if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
-                               x86_pmu.lbr_sel_map = NULL;
-               }
-       }
-
-       /* Support full width counters using alternative MSR range */
-       if (x86_pmu.intel_cap.full_width_write) {
-               x86_pmu.max_period = x86_pmu.cntval_mask;
-               x86_pmu.perfctr = MSR_IA32_PMC0;
-               pr_cont("full-width counters, ");
-       }
-
-       return 0;
-}
-
-/*
- * HT bug: phase 2 init
- * Called once we have valid topology information to check
- * whether or not HT is enabled
- * If HT is off, then we disable the workaround
- */
-static __init int fixup_ht_bug(void)
-{
-       int cpu = smp_processor_id();
-       int w, c;
-       /*
-        * problem not present on this CPU model, nothing to do
-        */
-       if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
-               return 0;
-
-       w = cpumask_weight(topology_sibling_cpumask(cpu));
-       if (w > 1) {
-               pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
-               return 0;
-       }
-
-       if (lockup_detector_suspend() != 0) {
-               pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
-               return 0;
-       }
-
-       x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
-
-       x86_pmu.start_scheduling = NULL;
-       x86_pmu.commit_scheduling = NULL;
-       x86_pmu.stop_scheduling = NULL;
-
-       lockup_detector_resume();
-
-       get_online_cpus();
-
-       for_each_online_cpu(c) {
-               free_excl_cntrs(c);
-       }
-
-       put_online_cpus();
-       pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
-       return 0;
-}
-subsys_initcall(fixup_ht_bug)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
deleted file mode 100644 (file)
index 2cad71d..0000000
+++ /dev/null
@@ -1,544 +0,0 @@
-/*
- * BTS PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#undef DEBUG
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/coredump.h>
-
-#include <asm-generic/sizes.h>
-#include <asm/perf_event.h>
-
-#include "perf_event.h"
-
-struct bts_ctx {
-       struct perf_output_handle       handle;
-       struct debug_store              ds_back;
-       int                             started;
-};
-
-static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
-
-#define BTS_RECORD_SIZE                24
-#define BTS_SAFETY_MARGIN      4080
-
-struct bts_phys {
-       struct page     *page;
-       unsigned long   size;
-       unsigned long   offset;
-       unsigned long   displacement;
-};
-
-struct bts_buffer {
-       size_t          real_size;      /* multiple of BTS_RECORD_SIZE */
-       unsigned int    nr_pages;
-       unsigned int    nr_bufs;
-       unsigned int    cur_buf;
-       bool            snapshot;
-       local_t         data_size;
-       local_t         lost;
-       local_t         head;
-       unsigned long   end;
-       void            **data_pages;
-       struct bts_phys buf[0];
-};
-
-struct pmu bts_pmu;
-
-static size_t buf_size(struct page *page)
-{
-       return 1 << (PAGE_SHIFT + page_private(page));
-}
-
-static void *
-bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
-{
-       struct bts_buffer *buf;
-       struct page *page;
-       int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
-       unsigned long offset;
-       size_t size = nr_pages << PAGE_SHIFT;
-       int pg, nbuf, pad;
-
-       /* count all the high order buffers */
-       for (pg = 0, nbuf = 0; pg < nr_pages;) {
-               page = virt_to_page(pages[pg]);
-               if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
-                       return NULL;
-               pg += 1 << page_private(page);
-               nbuf++;
-       }
-
-       /*
-        * to avoid interrupts in overwrite mode, only allow one physical
-        */
-       if (overwrite && nbuf > 1)
-               return NULL;
-
-       buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
-       if (!buf)
-               return NULL;
-
-       buf->nr_pages = nr_pages;
-       buf->nr_bufs = nbuf;
-       buf->snapshot = overwrite;
-       buf->data_pages = pages;
-       buf->real_size = size - size % BTS_RECORD_SIZE;
-
-       for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
-               unsigned int __nr_pages;
-
-               page = virt_to_page(pages[pg]);
-               __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
-               buf->buf[nbuf].page = page;
-               buf->buf[nbuf].offset = offset;
-               buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
-               buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
-               pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
-               buf->buf[nbuf].size -= pad;
-
-               pg += __nr_pages;
-               offset += __nr_pages << PAGE_SHIFT;
-       }
-
-       return buf;
-}
-
-static void bts_buffer_free_aux(void *data)
-{
-       kfree(data);
-}
-
-static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
-{
-       return buf->buf[idx].offset + buf->buf[idx].displacement;
-}
-
-static void
-bts_config_buffer(struct bts_buffer *buf)
-{
-       int cpu = raw_smp_processor_id();
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       struct bts_phys *phys = &buf->buf[buf->cur_buf];
-       unsigned long index, thresh = 0, end = phys->size;
-       struct page *page = phys->page;
-
-       index = local_read(&buf->head);
-
-       if (!buf->snapshot) {
-               if (buf->end < phys->offset + buf_size(page))
-                       end = buf->end - phys->offset - phys->displacement;
-
-               index -= phys->offset + phys->displacement;
-
-               if (end - index > BTS_SAFETY_MARGIN)
-                       thresh = end - BTS_SAFETY_MARGIN;
-               else if (end - index > BTS_RECORD_SIZE)
-                       thresh = end - BTS_RECORD_SIZE;
-               else
-                       thresh = end;
-       }
-
-       ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
-       ds->bts_index = ds->bts_buffer_base + index;
-       ds->bts_absolute_maximum = ds->bts_buffer_base + end;
-       ds->bts_interrupt_threshold = !buf->snapshot
-               ? ds->bts_buffer_base + thresh
-               : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
-}
-
-static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
-{
-       unsigned long index = head - phys->offset;
-
-       memset(page_address(phys->page) + index, 0, phys->size - index);
-}
-
-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
-{
-       if (buf->snapshot)
-               return false;
-
-       if (local_read(&buf->data_size) >= bts->handle.size ||
-           bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
-               return true;
-
-       return false;
-}
-
-static void bts_update(struct bts_ctx *bts)
-{
-       int cpu = raw_smp_processor_id();
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
-       unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
-
-       if (!buf)
-               return;
-
-       head = index + bts_buffer_offset(buf, buf->cur_buf);
-       old = local_xchg(&buf->head, head);
-
-       if (!buf->snapshot) {
-               if (old == head)
-                       return;
-
-               if (ds->bts_index >= ds->bts_absolute_maximum)
-                       local_inc(&buf->lost);
-
-               /*
-                * old and head are always in the same physical buffer, so we
-                * can subtract them to get the data size.
-                */
-               local_add(head - old, &buf->data_size);
-       } else {
-               local_set(&buf->data_size, head);
-       }
-}
-
-static void __bts_event_start(struct perf_event *event)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
-       u64 config = 0;
-
-       if (!buf || bts_buffer_is_full(buf, bts))
-               return;
-
-       event->hw.itrace_started = 1;
-       event->hw.state = 0;
-
-       if (!buf->snapshot)
-               config |= ARCH_PERFMON_EVENTSEL_INT;
-       if (!event->attr.exclude_kernel)
-               config |= ARCH_PERFMON_EVENTSEL_OS;
-       if (!event->attr.exclude_user)
-               config |= ARCH_PERFMON_EVENTSEL_USR;
-
-       bts_config_buffer(buf);
-
-       /*
-        * local barrier to make sure that ds configuration made it
-        * before we enable BTS
-        */
-       wmb();
-
-       intel_pmu_enable_bts(config);
-}
-
-static void bts_event_start(struct perf_event *event, int flags)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       __bts_event_start(event);
-
-       /* PMI handler: this counter is running and likely generating PMIs */
-       ACCESS_ONCE(bts->started) = 1;
-}
-
-static void __bts_event_stop(struct perf_event *event)
-{
-       /*
-        * No extra synchronization is mandated by the documentation to have
-        * BTS data stores globally visible.
-        */
-       intel_pmu_disable_bts();
-
-       if (event->hw.state & PERF_HES_STOPPED)
-               return;
-
-       ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
-}
-
-static void bts_event_stop(struct perf_event *event, int flags)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       /* PMI handler: don't restart this counter */
-       ACCESS_ONCE(bts->started) = 0;
-
-       __bts_event_stop(event);
-
-       if (flags & PERF_EF_UPDATE)
-               bts_update(bts);
-}
-
-void intel_bts_enable_local(void)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       if (bts->handle.event && bts->started)
-               __bts_event_start(bts->handle.event);
-}
-
-void intel_bts_disable_local(void)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-
-       if (bts->handle.event)
-               __bts_event_stop(bts->handle.event);
-}
-
-static int
-bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
-{
-       unsigned long head, space, next_space, pad, gap, skip, wakeup;
-       unsigned int next_buf;
-       struct bts_phys *phys, *next_phys;
-       int ret;
-
-       if (buf->snapshot)
-               return 0;
-
-       head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
-       if (WARN_ON_ONCE(head != local_read(&buf->head)))
-               return -EINVAL;
-
-       phys = &buf->buf[buf->cur_buf];
-       space = phys->offset + phys->displacement + phys->size - head;
-       pad = space;
-       if (space > handle->size) {
-               space = handle->size;
-               space -= space % BTS_RECORD_SIZE;
-       }
-       if (space <= BTS_SAFETY_MARGIN) {
-               /* See if next phys buffer has more space */
-               next_buf = buf->cur_buf + 1;
-               if (next_buf >= buf->nr_bufs)
-                       next_buf = 0;
-               next_phys = &buf->buf[next_buf];
-               gap = buf_size(phys->page) - phys->displacement - phys->size +
-                     next_phys->displacement;
-               skip = pad + gap;
-               if (handle->size >= skip) {
-                       next_space = next_phys->size;
-                       if (next_space + skip > handle->size) {
-                               next_space = handle->size - skip;
-                               next_space -= next_space % BTS_RECORD_SIZE;
-                       }
-                       if (next_space > space || !space) {
-                               if (pad)
-                                       bts_buffer_pad_out(phys, head);
-                               ret = perf_aux_output_skip(handle, skip);
-                               if (ret)
-                                       return ret;
-                               /* Advance to next phys buffer */
-                               phys = next_phys;
-                               space = next_space;
-                               head = phys->offset + phys->displacement;
-                               /*
-                                * After this, cur_buf and head won't match ds
-                                * anymore, so we must not be racing with
-                                * bts_update().
-                                */
-                               buf->cur_buf = next_buf;
-                               local_set(&buf->head, head);
-                       }
-               }
-       }
-
-       /* Don't go far beyond wakeup watermark */
-       wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
-                handle->head;
-       if (space > wakeup) {
-               space = wakeup;
-               space -= space % BTS_RECORD_SIZE;
-       }
-
-       buf->end = head + space;
-
-       /*
-        * If we have no space, the lost notification would have been sent when
-        * we hit absolute_maximum - see bts_update()
-        */
-       if (!space)
-               return -ENOSPC;
-
-       return 0;
-}
-
-int intel_bts_interrupt(void)
-{
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct perf_event *event = bts->handle.event;
-       struct bts_buffer *buf;
-       s64 old_head;
-       int err;
-
-       if (!event || !bts->started)
-               return 0;
-
-       buf = perf_get_aux(&bts->handle);
-       /*
-        * Skip snapshot counters: they don't use the interrupt, but
-        * there's no other way of telling, because the pointer will
-        * keep moving
-        */
-       if (!buf || buf->snapshot)
-               return 0;
-
-       old_head = local_read(&buf->head);
-       bts_update(bts);
-
-       /* no new data */
-       if (old_head == local_read(&buf->head))
-               return 0;
-
-       perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
-                           !!local_xchg(&buf->lost, 0));
-
-       buf = perf_aux_output_begin(&bts->handle, event);
-       if (!buf)
-               return 1;
-
-       err = bts_buffer_reset(buf, &bts->handle);
-       if (err)
-               perf_aux_output_end(&bts->handle, 0, false);
-
-       return 1;
-}
-
-static void bts_event_del(struct perf_event *event, int mode)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
-
-       bts_event_stop(event, PERF_EF_UPDATE);
-
-       if (buf) {
-               if (buf->snapshot)
-                       bts->handle.head =
-                               local_xchg(&buf->data_size,
-                                          buf->nr_pages << PAGE_SHIFT);
-               perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
-                                   !!local_xchg(&buf->lost, 0));
-       }
-
-       cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
-       cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
-       cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
-       cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
-}
-
-static int bts_event_add(struct perf_event *event, int mode)
-{
-       struct bts_buffer *buf;
-       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       int ret = -EBUSY;
-
-       event->hw.state = PERF_HES_STOPPED;
-
-       if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-               return -EBUSY;
-
-       if (bts->handle.event)
-               return -EBUSY;
-
-       buf = perf_aux_output_begin(&bts->handle, event);
-       if (!buf)
-               return -EINVAL;
-
-       ret = bts_buffer_reset(buf, &bts->handle);
-       if (ret) {
-               perf_aux_output_end(&bts->handle, 0, false);
-               return ret;
-       }
-
-       bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
-       bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
-       bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
-
-       if (mode & PERF_EF_START) {
-               bts_event_start(event, 0);
-               if (hwc->state & PERF_HES_STOPPED) {
-                       bts_event_del(event, 0);
-                       return -EBUSY;
-               }
-       }
-
-       return 0;
-}
-
-static void bts_event_destroy(struct perf_event *event)
-{
-       x86_release_hardware();
-       x86_del_exclusive(x86_lbr_exclusive_bts);
-}
-
-static int bts_event_init(struct perf_event *event)
-{
-       int ret;
-
-       if (event->attr.type != bts_pmu.type)
-               return -ENOENT;
-
-       if (x86_add_exclusive(x86_lbr_exclusive_bts))
-               return -EBUSY;
-
-       /*
-        * BTS leaks kernel addresses even when CPL0 tracing is
-        * disabled, so disallow intel_bts driver for unprivileged
-        * users on paranoid systems since it provides trace data
-        * to the user in a zero-copy fashion.
-        *
-        * Note that the default paranoia setting permits unprivileged
-        * users to profile the kernel.
-        */
-       if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
-           !capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
-       ret = x86_reserve_hardware();
-       if (ret) {
-               x86_del_exclusive(x86_lbr_exclusive_bts);
-               return ret;
-       }
-
-       event->destroy = bts_event_destroy;
-
-       return 0;
-}
-
-static void bts_event_read(struct perf_event *event)
-{
-}
-
-static __init int bts_init(void)
-{
-       if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
-               return -ENODEV;
-
-       bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
-       bts_pmu.task_ctx_nr     = perf_sw_context;
-       bts_pmu.event_init      = bts_event_init;
-       bts_pmu.add             = bts_event_add;
-       bts_pmu.del             = bts_event_del;
-       bts_pmu.start           = bts_event_start;
-       bts_pmu.stop            = bts_event_stop;
-       bts_pmu.read            = bts_event_read;
-       bts_pmu.setup_aux       = bts_buffer_setup_aux;
-       bts_pmu.free_aux        = bts_buffer_free_aux;
-
-       return perf_pmu_register(&bts_pmu, "intel_bts", -1);
-}
-arch_initcall(bts_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
deleted file mode 100644 (file)
index a316ca9..0000000
+++ /dev/null
@@ -1,1391 +0,0 @@
-/*
- * Intel Cache Quality-of-Service Monitoring (CQM) support.
- *
- * Based very, very heavily on work by Peter Zijlstra.
- */
-
-#include <linux/perf_event.h>
-#include <linux/slab.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-#define MSR_IA32_PQR_ASSOC     0x0c8f
-#define MSR_IA32_QM_CTR                0x0c8e
-#define MSR_IA32_QM_EVTSEL     0x0c8d
-
-static u32 cqm_max_rmid = -1;
-static unsigned int cqm_l3_scale; /* supposedly cacheline size */
-
-/**
- * struct intel_pqr_state - State cache for the PQR MSR
- * @rmid:              The cached Resource Monitoring ID
- * @closid:            The cached Class Of Service ID
- * @rmid_usecnt:       The usage counter for rmid
- *
- * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct intel_pqr_state {
-       u32                     rmid;
-       u32                     closid;
-       int                     rmid_usecnt;
-};
-
-/*
- * The cached intel_pqr_state is strictly per CPU and can never be
- * updated from a remote CPU. Both functions which modify the state
- * (intel_cqm_event_start and intel_cqm_event_stop) are called with
- * interrupts disabled, which is sufficient for the protection.
- */
-static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
-
-/*
- * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
- * Also protects event->hw.cqm_rmid
- *
- * Hold either for stability, both for modification of ->hw.cqm_rmid.
- */
-static DEFINE_MUTEX(cache_mutex);
-static DEFINE_RAW_SPINLOCK(cache_lock);
-
-/*
- * Groups of events that have the same target(s), one RMID per group.
- */
-static LIST_HEAD(cache_groups);
-
-/*
- * Mask of CPUs for reading CQM values. We only need one per-socket.
- */
-static cpumask_t cqm_cpumask;
-
-#define RMID_VAL_ERROR         (1ULL << 63)
-#define RMID_VAL_UNAVAIL       (1ULL << 62)
-
-#define QOS_L3_OCCUP_EVENT_ID  (1 << 0)
-
-#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
-
-/*
- * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
- *
- * This rmid is always free and is guaranteed to have an associated
- * near-zero occupancy value, i.e. no cachelines are tagged with this
- * RMID, once __intel_cqm_rmid_rotate() returns.
- */
-static u32 intel_cqm_rotation_rmid;
-
-#define INVALID_RMID           (-1)
-
-/*
- * Is @rmid valid for programming the hardware?
- *
- * rmid 0 is reserved by the hardware for all non-monitored tasks, which
- * means that we should never come across an rmid with that value.
- * Likewise, an rmid value of -1 is used to indicate "no rmid currently
- * assigned" and is used as part of the rotation code.
- */
-static inline bool __rmid_valid(u32 rmid)
-{
-       if (!rmid || rmid == INVALID_RMID)
-               return false;
-
-       return true;
-}
-
-static u64 __rmid_read(u32 rmid)
-{
-       u64 val;
-
-       /*
-        * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
-        * it just says that to increase confusion.
-        */
-       wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
-       rdmsrl(MSR_IA32_QM_CTR, val);
-
-       /*
-        * Aside from the ERROR and UNAVAIL bits, assume this thing returns
-        * the number of cachelines tagged with @rmid.
-        */
-       return val;
-}
-
-enum rmid_recycle_state {
-       RMID_YOUNG = 0,
-       RMID_AVAILABLE,
-       RMID_DIRTY,
-};
-
-struct cqm_rmid_entry {
-       u32 rmid;
-       enum rmid_recycle_state state;
-       struct list_head list;
-       unsigned long queue_time;
-};
-
-/*
- * cqm_rmid_free_lru - A least recently used list of RMIDs.
- *
- * Oldest entry at the head, newest (most recently used) entry at the
- * tail. This list is never traversed, it's only used to keep track of
- * the lru order. That is, we only pick entries of the head or insert
- * them on the tail.
- *
- * All entries on the list are 'free', and their RMIDs are not currently
- * in use. To mark an RMID as in use, remove its entry from the lru
- * list.
- *
- *
- * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
- *
- * This list is contains RMIDs that no one is currently using but that
- * may have a non-zero occupancy value associated with them. The
- * rotation worker moves RMIDs from the limbo list to the free list once
- * the occupancy value drops below __intel_cqm_threshold.
- *
- * Both lists are protected by cache_mutex.
- */
-static LIST_HEAD(cqm_rmid_free_lru);
-static LIST_HEAD(cqm_rmid_limbo_lru);
-
-/*
- * We use a simple array of pointers so that we can lookup a struct
- * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
- * and __put_rmid() from having to worry about dealing with struct
- * cqm_rmid_entry - they just deal with rmids, i.e. integers.
- *
- * Once this array is initialized it is read-only. No locks are required
- * to access it.
- *
- * All entries for all RMIDs can be looked up in the this array at all
- * times.
- */
-static struct cqm_rmid_entry **cqm_rmid_ptrs;
-
-static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
-{
-       struct cqm_rmid_entry *entry;
-
-       entry = cqm_rmid_ptrs[rmid];
-       WARN_ON(entry->rmid != rmid);
-
-       return entry;
-}
-
-/*
- * Returns < 0 on fail.
- *
- * We expect to be called with cache_mutex held.
- */
-static u32 __get_rmid(void)
-{
-       struct cqm_rmid_entry *entry;
-
-       lockdep_assert_held(&cache_mutex);
-
-       if (list_empty(&cqm_rmid_free_lru))
-               return INVALID_RMID;
-
-       entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
-       list_del(&entry->list);
-
-       return entry->rmid;
-}
-
-static void __put_rmid(u32 rmid)
-{
-       struct cqm_rmid_entry *entry;
-
-       lockdep_assert_held(&cache_mutex);
-
-       WARN_ON(!__rmid_valid(rmid));
-       entry = __rmid_entry(rmid);
-
-       entry->queue_time = jiffies;
-       entry->state = RMID_YOUNG;
-
-       list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
-}
-
-static int intel_cqm_setup_rmid_cache(void)
-{
-       struct cqm_rmid_entry *entry;
-       unsigned int nr_rmids;
-       int r = 0;
-
-       nr_rmids = cqm_max_rmid + 1;
-       cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) *
-                               nr_rmids, GFP_KERNEL);
-       if (!cqm_rmid_ptrs)
-               return -ENOMEM;
-
-       for (; r <= cqm_max_rmid; r++) {
-               struct cqm_rmid_entry *entry;
-
-               entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-               if (!entry)
-                       goto fail;
-
-               INIT_LIST_HEAD(&entry->list);
-               entry->rmid = r;
-               cqm_rmid_ptrs[r] = entry;
-
-               list_add_tail(&entry->list, &cqm_rmid_free_lru);
-       }
-
-       /*
-        * RMID 0 is special and is always allocated. It's used for all
-        * tasks that are not monitored.
-        */
-       entry = __rmid_entry(0);
-       list_del(&entry->list);
-
-       mutex_lock(&cache_mutex);
-       intel_cqm_rotation_rmid = __get_rmid();
-       mutex_unlock(&cache_mutex);
-
-       return 0;
-fail:
-       while (r--)
-               kfree(cqm_rmid_ptrs[r]);
-
-       kfree(cqm_rmid_ptrs);
-       return -ENOMEM;
-}
-
-/*
- * Determine if @a and @b measure the same set of tasks.
- *
- * If @a and @b measure the same set of tasks then we want to share a
- * single RMID.
- */
-static bool __match_event(struct perf_event *a, struct perf_event *b)
-{
-       /* Per-cpu and task events don't mix */
-       if ((a->attach_state & PERF_ATTACH_TASK) !=
-           (b->attach_state & PERF_ATTACH_TASK))
-               return false;
-
-#ifdef CONFIG_CGROUP_PERF
-       if (a->cgrp != b->cgrp)
-               return false;
-#endif
-
-       /* If not task event, we're machine wide */
-       if (!(b->attach_state & PERF_ATTACH_TASK))
-               return true;
-
-       /*
-        * Events that target same task are placed into the same cache group.
-        */
-       if (a->hw.target == b->hw.target)
-               return true;
-
-       /*
-        * Are we an inherited event?
-        */
-       if (b->parent == a)
-               return true;
-
-       return false;
-}
-
-#ifdef CONFIG_CGROUP_PERF
-static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
-{
-       if (event->attach_state & PERF_ATTACH_TASK)
-               return perf_cgroup_from_task(event->hw.target, event->ctx);
-
-       return event->cgrp;
-}
-#endif
-
-/*
- * Determine if @a's tasks intersect with @b's tasks
- *
- * There are combinations of events that we explicitly prohibit,
- *
- *                PROHIBITS
- *     system-wide    ->       cgroup and task
- *     cgroup        ->        system-wide
- *                           ->        task in cgroup
- *     task          ->        system-wide
- *                           ->        task in cgroup
- *
- * Call this function before allocating an RMID.
- */
-static bool __conflict_event(struct perf_event *a, struct perf_event *b)
-{
-#ifdef CONFIG_CGROUP_PERF
-       /*
-        * We can have any number of cgroups but only one system-wide
-        * event at a time.
-        */
-       if (a->cgrp && b->cgrp) {
-               struct perf_cgroup *ac = a->cgrp;
-               struct perf_cgroup *bc = b->cgrp;
-
-               /*
-                * This condition should have been caught in
-                * __match_event() and we should be sharing an RMID.
-                */
-               WARN_ON_ONCE(ac == bc);
-
-               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
-                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
-                       return true;
-
-               return false;
-       }
-
-       if (a->cgrp || b->cgrp) {
-               struct perf_cgroup *ac, *bc;
-
-               /*
-                * cgroup and system-wide events are mutually exclusive
-                */
-               if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
-                   (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
-                       return true;
-
-               /*
-                * Ensure neither event is part of the other's cgroup
-                */
-               ac = event_to_cgroup(a);
-               bc = event_to_cgroup(b);
-               if (ac == bc)
-                       return true;
-
-               /*
-                * Must have cgroup and non-intersecting task events.
-                */
-               if (!ac || !bc)
-                       return false;
-
-               /*
-                * We have cgroup and task events, and the task belongs
-                * to a cgroup. Check for for overlap.
-                */
-               if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
-                   cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
-                       return true;
-
-               return false;
-       }
-#endif
-       /*
-        * If one of them is not a task, same story as above with cgroups.
-        */
-       if (!(a->attach_state & PERF_ATTACH_TASK) ||
-           !(b->attach_state & PERF_ATTACH_TASK))
-               return true;
-
-       /*
-        * Must be non-overlapping.
-        */
-       return false;
-}
-
-struct rmid_read {
-       u32 rmid;
-       atomic64_t value;
-};
-
-static void __intel_cqm_event_count(void *info);
-
-/*
- * Exchange the RMID of a group of events.
- */
-static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
-{
-       struct perf_event *event;
-       struct list_head *head = &group->hw.cqm_group_entry;
-       u32 old_rmid = group->hw.cqm_rmid;
-
-       lockdep_assert_held(&cache_mutex);
-
-       /*
-        * If our RMID is being deallocated, perform a read now.
-        */
-       if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
-               struct rmid_read rr = {
-                       .value = ATOMIC64_INIT(0),
-                       .rmid = old_rmid,
-               };
-
-               on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
-                                &rr, 1);
-               local64_set(&group->count, atomic64_read(&rr.value));
-       }
-
-       raw_spin_lock_irq(&cache_lock);
-
-       group->hw.cqm_rmid = rmid;
-       list_for_each_entry(event, head, hw.cqm_group_entry)
-               event->hw.cqm_rmid = rmid;
-
-       raw_spin_unlock_irq(&cache_lock);
-
-       return old_rmid;
-}
-
-/*
- * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
- * cachelines are still tagged with RMIDs in limbo, we progressively
- * increment the threshold until we find an RMID in limbo with <=
- * __intel_cqm_threshold lines tagged. This is designed to mitigate the
- * problem where cachelines tagged with an RMID are not steadily being
- * evicted.
- *
- * On successful rotations we decrease the threshold back towards zero.
- *
- * __intel_cqm_max_threshold provides an upper bound on the threshold,
- * and is measured in bytes because it's exposed to userland.
- */
-static unsigned int __intel_cqm_threshold;
-static unsigned int __intel_cqm_max_threshold;
-
-/*
- * Test whether an RMID has a zero occupancy value on this cpu.
- */
-static void intel_cqm_stable(void *arg)
-{
-       struct cqm_rmid_entry *entry;
-
-       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
-               if (entry->state != RMID_AVAILABLE)
-                       break;
-
-               if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
-                       entry->state = RMID_DIRTY;
-       }
-}
-
-/*
- * If we have group events waiting for an RMID that don't conflict with
- * events already running, assign @rmid.
- */
-static bool intel_cqm_sched_in_event(u32 rmid)
-{
-       struct perf_event *leader, *event;
-
-       lockdep_assert_held(&cache_mutex);
-
-       leader = list_first_entry(&cache_groups, struct perf_event,
-                                 hw.cqm_groups_entry);
-       event = leader;
-
-       list_for_each_entry_continue(event, &cache_groups,
-                                    hw.cqm_groups_entry) {
-               if (__rmid_valid(event->hw.cqm_rmid))
-                       continue;
-
-               if (__conflict_event(event, leader))
-                       continue;
-
-               intel_cqm_xchg_rmid(event, rmid);
-               return true;
-       }
-
-       return false;
-}
-
-/*
- * Initially use this constant for both the limbo queue time and the
- * rotation timer interval, pmu::hrtimer_interval_ms.
- *
- * They don't need to be the same, but the two are related since if you
- * rotate faster than you recycle RMIDs, you may run out of available
- * RMIDs.
- */
-#define RMID_DEFAULT_QUEUE_TIME 250    /* ms */
-
-static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
-
-/*
- * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
- * @nr_available: number of freeable RMIDs on the limbo list
- *
- * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
- * cachelines are tagged with those RMIDs. After this we can reuse them
- * and know that the current set of active RMIDs is stable.
- *
- * Return %true or %false depending on whether stabilization needs to be
- * reattempted.
- *
- * If we return %true then @nr_available is updated to indicate the
- * number of RMIDs on the limbo list that have been queued for the
- * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
- * are above __intel_cqm_threshold.
- */
-static bool intel_cqm_rmid_stabilize(unsigned int *available)
-{
-       struct cqm_rmid_entry *entry, *tmp;
-
-       lockdep_assert_held(&cache_mutex);
-
-       *available = 0;
-       list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
-               unsigned long min_queue_time;
-               unsigned long now = jiffies;
-
-               /*
-                * We hold RMIDs placed into limbo for a minimum queue
-                * time. Before the minimum queue time has elapsed we do
-                * not recycle RMIDs.
-                *
-                * The reasoning is that until a sufficient time has
-                * passed since we stopped using an RMID, any RMID
-                * placed onto the limbo list will likely still have
-                * data tagged in the cache, which means we'll probably
-                * fail to recycle it anyway.
-                *
-                * We can save ourselves an expensive IPI by skipping
-                * any RMIDs that have not been queued for the minimum
-                * time.
-                */
-               min_queue_time = entry->queue_time +
-                       msecs_to_jiffies(__rmid_queue_time_ms);
-
-               if (time_after(min_queue_time, now))
-                       break;
-
-               entry->state = RMID_AVAILABLE;
-               (*available)++;
-       }
-
-       /*
-        * Fast return if none of the RMIDs on the limbo list have been
-        * sitting on the queue for the minimum queue time.
-        */
-       if (!*available)
-               return false;
-
-       /*
-        * Test whether an RMID is free for each package.
-        */
-       on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
-
-       list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
-               /*
-                * Exhausted all RMIDs that have waited min queue time.
-                */
-               if (entry->state == RMID_YOUNG)
-                       break;
-
-               if (entry->state == RMID_DIRTY)
-                       continue;
-
-               list_del(&entry->list); /* remove from limbo */
-
-               /*
-                * The rotation RMID gets priority if it's
-                * currently invalid. In which case, skip adding
-                * the RMID to the the free lru.
-                */
-               if (!__rmid_valid(intel_cqm_rotation_rmid)) {
-                       intel_cqm_rotation_rmid = entry->rmid;
-                       continue;
-               }
-
-               /*
-                * If we have groups waiting for RMIDs, hand
-                * them one now provided they don't conflict.
-                */
-               if (intel_cqm_sched_in_event(entry->rmid))
-                       continue;
-
-               /*
-                * Otherwise place it onto the free list.
-                */
-               list_add_tail(&entry->list, &cqm_rmid_free_lru);
-       }
-
-
-       return __rmid_valid(intel_cqm_rotation_rmid);
-}
-
-/*
- * Pick a victim group and move it to the tail of the group list.
- * @next: The first group without an RMID
- */
-static void __intel_cqm_pick_and_rotate(struct perf_event *next)
-{
-       struct perf_event *rotor;
-       u32 rmid;
-
-       lockdep_assert_held(&cache_mutex);
-
-       rotor = list_first_entry(&cache_groups, struct perf_event,
-                                hw.cqm_groups_entry);
-
-       /*
-        * The group at the front of the list should always have a valid
-        * RMID. If it doesn't then no groups have RMIDs assigned and we
-        * don't need to rotate the list.
-        */
-       if (next == rotor)
-               return;
-
-       rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
-       __put_rmid(rmid);
-
-       list_rotate_left(&cache_groups);
-}
-
-/*
- * Deallocate the RMIDs from any events that conflict with @event, and
- * place them on the back of the group list.
- */
-static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
-{
-       struct perf_event *group, *g;
-       u32 rmid;
-
-       lockdep_assert_held(&cache_mutex);
-
-       list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
-               if (group == event)
-                       continue;
-
-               rmid = group->hw.cqm_rmid;
-
-               /*
-                * Skip events that don't have a valid RMID.
-                */
-               if (!__rmid_valid(rmid))
-                       continue;
-
-               /*
-                * No conflict? No problem! Leave the event alone.
-                */
-               if (!__conflict_event(group, event))
-                       continue;
-
-               intel_cqm_xchg_rmid(group, INVALID_RMID);
-               __put_rmid(rmid);
-       }
-}
-
-/*
- * Attempt to rotate the groups and assign new RMIDs.
- *
- * We rotate for two reasons,
- *   1. To handle the scheduling of conflicting events
- *   2. To recycle RMIDs
- *
- * Rotating RMIDs is complicated because the hardware doesn't give us
- * any clues.
- *
- * There's problems with the hardware interface; when you change the
- * task:RMID map cachelines retain their 'old' tags, giving a skewed
- * picture. In order to work around this, we must always keep one free
- * RMID - intel_cqm_rotation_rmid.
- *
- * Rotation works by taking away an RMID from a group (the old RMID),
- * and assigning the free RMID to another group (the new RMID). We must
- * then wait for the old RMID to not be used (no cachelines tagged).
- * This ensure that all cachelines are tagged with 'active' RMIDs. At
- * this point we can start reading values for the new RMID and treat the
- * old RMID as the free RMID for the next rotation.
- *
- * Return %true or %false depending on whether we did any rotating.
- */
-static bool __intel_cqm_rmid_rotate(void)
-{
-       struct perf_event *group, *start = NULL;
-       unsigned int threshold_limit;
-       unsigned int nr_needed = 0;
-       unsigned int nr_available;
-       bool rotated = false;
-
-       mutex_lock(&cache_mutex);
-
-again:
-       /*
-        * Fast path through this function if there are no groups and no
-        * RMIDs that need cleaning.
-        */
-       if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
-               goto out;
-
-       list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
-               if (!__rmid_valid(group->hw.cqm_rmid)) {
-                       if (!start)
-                               start = group;
-                       nr_needed++;
-               }
-       }
-
-       /*
-        * We have some event groups, but they all have RMIDs assigned
-        * and no RMIDs need cleaning.
-        */
-       if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
-               goto out;
-
-       if (!nr_needed)
-               goto stabilize;
-
-       /*
-        * We have more event groups without RMIDs than available RMIDs,
-        * or we have event groups that conflict with the ones currently
-        * scheduled.
-        *
-        * We force deallocate the rmid of the group at the head of
-        * cache_groups. The first event group without an RMID then gets
-        * assigned intel_cqm_rotation_rmid. This ensures we always make
-        * forward progress.
-        *
-        * Rotate the cache_groups list so the previous head is now the
-        * tail.
-        */
-       __intel_cqm_pick_and_rotate(start);
-
-       /*
-        * If the rotation is going to succeed, reduce the threshold so
-        * that we don't needlessly reuse dirty RMIDs.
-        */
-       if (__rmid_valid(intel_cqm_rotation_rmid)) {
-               intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
-               intel_cqm_rotation_rmid = __get_rmid();
-
-               intel_cqm_sched_out_conflicting_events(start);
-
-               if (__intel_cqm_threshold)
-                       __intel_cqm_threshold--;
-       }
-
-       rotated = true;
-
-stabilize:
-       /*
-        * We now need to stablize the RMID we freed above (if any) to
-        * ensure that the next time we rotate we have an RMID with zero
-        * occupancy value.
-        *
-        * Alternatively, if we didn't need to perform any rotation,
-        * we'll have a bunch of RMIDs in limbo that need stabilizing.
-        */
-       threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
-
-       while (intel_cqm_rmid_stabilize(&nr_available) &&
-              __intel_cqm_threshold < threshold_limit) {
-               unsigned int steal_limit;
-
-               /*
-                * Don't spin if nobody is actively waiting for an RMID,
-                * the rotation worker will be kicked as soon as an
-                * event needs an RMID anyway.
-                */
-               if (!nr_needed)
-                       break;
-
-               /* Allow max 25% of RMIDs to be in limbo. */
-               steal_limit = (cqm_max_rmid + 1) / 4;
-
-               /*
-                * We failed to stabilize any RMIDs so our rotation
-                * logic is now stuck. In order to make forward progress
-                * we have a few options:
-                *
-                *   1. rotate ("steal") another RMID
-                *   2. increase the threshold
-                *   3. do nothing
-                *
-                * We do both of 1. and 2. until we hit the steal limit.
-                *
-                * The steal limit prevents all RMIDs ending up on the
-                * limbo list. This can happen if every RMID has a
-                * non-zero occupancy above threshold_limit, and the
-                * occupancy values aren't dropping fast enough.
-                *
-                * Note that there is prioritisation at work here - we'd
-                * rather increase the number of RMIDs on the limbo list
-                * than increase the threshold, because increasing the
-                * threshold skews the event data (because we reuse
-                * dirty RMIDs) - threshold bumps are a last resort.
-                */
-               if (nr_available < steal_limit)
-                       goto again;
-
-               __intel_cqm_threshold++;
-       }
-
-out:
-       mutex_unlock(&cache_mutex);
-       return rotated;
-}
-
-static void intel_cqm_rmid_rotate(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
-
-static struct pmu intel_cqm_pmu;
-
-static void intel_cqm_rmid_rotate(struct work_struct *work)
-{
-       unsigned long delay;
-
-       __intel_cqm_rmid_rotate();
-
-       delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
-       schedule_delayed_work(&intel_cqm_rmid_work, delay);
-}
-
-/*
- * Find a group and setup RMID.
- *
- * If we're part of a group, we use the group's RMID.
- */
-static void intel_cqm_setup_event(struct perf_event *event,
-                                 struct perf_event **group)
-{
-       struct perf_event *iter;
-       bool conflict = false;
-       u32 rmid;
-
-       list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
-               rmid = iter->hw.cqm_rmid;
-
-               if (__match_event(iter, event)) {
-                       /* All tasks in a group share an RMID */
-                       event->hw.cqm_rmid = rmid;
-                       *group = iter;
-                       return;
-               }
-
-               /*
-                * We only care about conflicts for events that are
-                * actually scheduled in (and hence have a valid RMID).
-                */
-               if (__conflict_event(iter, event) && __rmid_valid(rmid))
-                       conflict = true;
-       }
-
-       if (conflict)
-               rmid = INVALID_RMID;
-       else
-               rmid = __get_rmid();
-
-       event->hw.cqm_rmid = rmid;
-}
-
-static void intel_cqm_event_read(struct perf_event *event)
-{
-       unsigned long flags;
-       u32 rmid;
-       u64 val;
-
-       /*
-        * Task events are handled by intel_cqm_event_count().
-        */
-       if (event->cpu == -1)
-               return;
-
-       raw_spin_lock_irqsave(&cache_lock, flags);
-       rmid = event->hw.cqm_rmid;
-
-       if (!__rmid_valid(rmid))
-               goto out;
-
-       val = __rmid_read(rmid);
-
-       /*
-        * Ignore this reading on error states and do not update the value.
-        */
-       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-               goto out;
-
-       local64_set(&event->count, val);
-out:
-       raw_spin_unlock_irqrestore(&cache_lock, flags);
-}
-
-static void __intel_cqm_event_count(void *info)
-{
-       struct rmid_read *rr = info;
-       u64 val;
-
-       val = __rmid_read(rr->rmid);
-
-       if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
-               return;
-
-       atomic64_add(val, &rr->value);
-}
-
-static inline bool cqm_group_leader(struct perf_event *event)
-{
-       return !list_empty(&event->hw.cqm_groups_entry);
-}
-
-static u64 intel_cqm_event_count(struct perf_event *event)
-{
-       unsigned long flags;
-       struct rmid_read rr = {
-               .value = ATOMIC64_INIT(0),
-       };
-
-       /*
-        * We only need to worry about task events. System-wide events
-        * are handled like usual, i.e. entirely with
-        * intel_cqm_event_read().
-        */
-       if (event->cpu != -1)
-               return __perf_event_count(event);
-
-       /*
-        * Only the group leader gets to report values. This stops us
-        * reporting duplicate values to userspace, and gives us a clear
-        * rule for which task gets to report the values.
-        *
-        * Note that it is impossible to attribute these values to
-        * specific packages - we forfeit that ability when we create
-        * task events.
-        */
-       if (!cqm_group_leader(event))
-               return 0;
-
-       /*
-        * Getting up-to-date values requires an SMP IPI which is not
-        * possible if we're being called in interrupt context. Return
-        * the cached values instead.
-        */
-       if (unlikely(in_interrupt()))
-               goto out;
-
-       /*
-        * Notice that we don't perform the reading of an RMID
-        * atomically, because we can't hold a spin lock across the
-        * IPIs.
-        *
-        * Speculatively perform the read, since @event might be
-        * assigned a different (possibly invalid) RMID while we're
-        * busying performing the IPI calls. It's therefore necessary to
-        * check @event's RMID afterwards, and if it has changed,
-        * discard the result of the read.
-        */
-       rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
-
-       if (!__rmid_valid(rr.rmid))
-               goto out;
-
-       on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
-
-       raw_spin_lock_irqsave(&cache_lock, flags);
-       if (event->hw.cqm_rmid == rr.rmid)
-               local64_set(&event->count, atomic64_read(&rr.value));
-       raw_spin_unlock_irqrestore(&cache_lock, flags);
-out:
-       return __perf_event_count(event);
-}
-
-static void intel_cqm_event_start(struct perf_event *event, int mode)
-{
-       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-       u32 rmid = event->hw.cqm_rmid;
-
-       if (!(event->hw.cqm_state & PERF_HES_STOPPED))
-               return;
-
-       event->hw.cqm_state &= ~PERF_HES_STOPPED;
-
-       if (state->rmid_usecnt++) {
-               if (!WARN_ON_ONCE(state->rmid != rmid))
-                       return;
-       } else {
-               WARN_ON_ONCE(state->rmid);
-       }
-
-       state->rmid = rmid;
-       wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
-}
-
-static void intel_cqm_event_stop(struct perf_event *event, int mode)
-{
-       struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
-
-       if (event->hw.cqm_state & PERF_HES_STOPPED)
-               return;
-
-       event->hw.cqm_state |= PERF_HES_STOPPED;
-
-       intel_cqm_event_read(event);
-
-       if (!--state->rmid_usecnt) {
-               state->rmid = 0;
-               wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
-       } else {
-               WARN_ON_ONCE(!state->rmid);
-       }
-}
-
-static int intel_cqm_event_add(struct perf_event *event, int mode)
-{
-       unsigned long flags;
-       u32 rmid;
-
-       raw_spin_lock_irqsave(&cache_lock, flags);
-
-       event->hw.cqm_state = PERF_HES_STOPPED;
-       rmid = event->hw.cqm_rmid;
-
-       if (__rmid_valid(rmid) && (mode & PERF_EF_START))
-               intel_cqm_event_start(event, mode);
-
-       raw_spin_unlock_irqrestore(&cache_lock, flags);
-
-       return 0;
-}
-
-static void intel_cqm_event_destroy(struct perf_event *event)
-{
-       struct perf_event *group_other = NULL;
-
-       mutex_lock(&cache_mutex);
-
-       /*
-        * If there's another event in this group...
-        */
-       if (!list_empty(&event->hw.cqm_group_entry)) {
-               group_other = list_first_entry(&event->hw.cqm_group_entry,
-                                              struct perf_event,
-                                              hw.cqm_group_entry);
-               list_del(&event->hw.cqm_group_entry);
-       }
-
-       /*
-        * And we're the group leader..
-        */
-       if (cqm_group_leader(event)) {
-               /*
-                * If there was a group_other, make that leader, otherwise
-                * destroy the group and return the RMID.
-                */
-               if (group_other) {
-                       list_replace(&event->hw.cqm_groups_entry,
-                                    &group_other->hw.cqm_groups_entry);
-               } else {
-                       u32 rmid = event->hw.cqm_rmid;
-
-                       if (__rmid_valid(rmid))
-                               __put_rmid(rmid);
-                       list_del(&event->hw.cqm_groups_entry);
-               }
-       }
-
-       mutex_unlock(&cache_mutex);
-}
-
-static int intel_cqm_event_init(struct perf_event *event)
-{
-       struct perf_event *group = NULL;
-       bool rotate = false;
-
-       if (event->attr.type != intel_cqm_pmu.type)
-               return -ENOENT;
-
-       if (event->attr.config & ~QOS_EVENT_MASK)
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       INIT_LIST_HEAD(&event->hw.cqm_group_entry);
-       INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
-
-       event->destroy = intel_cqm_event_destroy;
-
-       mutex_lock(&cache_mutex);
-
-       /* Will also set rmid */
-       intel_cqm_setup_event(event, &group);
-
-       if (group) {
-               list_add_tail(&event->hw.cqm_group_entry,
-                             &group->hw.cqm_group_entry);
-       } else {
-               list_add_tail(&event->hw.cqm_groups_entry,
-                             &cache_groups);
-
-               /*
-                * All RMIDs are either in use or have recently been
-                * used. Kick the rotation worker to clean/free some.
-                *
-                * We only do this for the group leader, rather than for
-                * every event in a group to save on needless work.
-                */
-               if (!__rmid_valid(event->hw.cqm_rmid))
-                       rotate = true;
-       }
-
-       mutex_unlock(&cache_mutex);
-
-       if (rotate)
-               schedule_delayed_work(&intel_cqm_rmid_work, 0);
-
-       return 0;
-}
-
-EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
-EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
-EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
-EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
-EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
-
-static struct attribute *intel_cqm_events_attr[] = {
-       EVENT_PTR(intel_cqm_llc),
-       EVENT_PTR(intel_cqm_llc_pkg),
-       EVENT_PTR(intel_cqm_llc_unit),
-       EVENT_PTR(intel_cqm_llc_scale),
-       EVENT_PTR(intel_cqm_llc_snapshot),
-       NULL,
-};
-
-static struct attribute_group intel_cqm_events_group = {
-       .name = "events",
-       .attrs = intel_cqm_events_attr,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-7");
-static struct attribute *intel_cqm_formats_attr[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-
-static struct attribute_group intel_cqm_format_group = {
-       .name = "format",
-       .attrs = intel_cqm_formats_attr,
-};
-
-static ssize_t
-max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
-                          char *page)
-{
-       ssize_t rv;
-
-       mutex_lock(&cache_mutex);
-       rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
-       mutex_unlock(&cache_mutex);
-
-       return rv;
-}
-
-static ssize_t
-max_recycle_threshold_store(struct device *dev,
-                           struct device_attribute *attr,
-                           const char *buf, size_t count)
-{
-       unsigned int bytes, cachelines;
-       int ret;
-
-       ret = kstrtouint(buf, 0, &bytes);
-       if (ret)
-               return ret;
-
-       mutex_lock(&cache_mutex);
-
-       __intel_cqm_max_threshold = bytes;
-       cachelines = bytes / cqm_l3_scale;
-
-       /*
-        * The new maximum takes effect immediately.
-        */
-       if (__intel_cqm_threshold > cachelines)
-               __intel_cqm_threshold = cachelines;
-
-       mutex_unlock(&cache_mutex);
-
-       return count;
-}
-
-static DEVICE_ATTR_RW(max_recycle_threshold);
-
-static struct attribute *intel_cqm_attrs[] = {
-       &dev_attr_max_recycle_threshold.attr,
-       NULL,
-};
-
-static const struct attribute_group intel_cqm_group = {
-       .attrs = intel_cqm_attrs,
-};
-
-static const struct attribute_group *intel_cqm_attr_groups[] = {
-       &intel_cqm_events_group,
-       &intel_cqm_format_group,
-       &intel_cqm_group,
-       NULL,
-};
-
-static struct pmu intel_cqm_pmu = {
-       .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
-       .attr_groups         = intel_cqm_attr_groups,
-       .task_ctx_nr         = perf_sw_context,
-       .event_init          = intel_cqm_event_init,
-       .add                 = intel_cqm_event_add,
-       .del                 = intel_cqm_event_stop,
-       .start               = intel_cqm_event_start,
-       .stop                = intel_cqm_event_stop,
-       .read                = intel_cqm_event_read,
-       .count               = intel_cqm_event_count,
-};
-
-static inline void cqm_pick_event_reader(int cpu)
-{
-       int phys_id = topology_physical_package_id(cpu);
-       int i;
-
-       for_each_cpu(i, &cqm_cpumask) {
-               if (phys_id == topology_physical_package_id(i))
-                       return; /* already got reader for this socket */
-       }
-
-       cpumask_set_cpu(cpu, &cqm_cpumask);
-}
-
-static void intel_cqm_cpu_starting(unsigned int cpu)
-{
-       struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
-       struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-       state->rmid = 0;
-       state->closid = 0;
-       state->rmid_usecnt = 0;
-
-       WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
-       WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
-}
-
-static void intel_cqm_cpu_exit(unsigned int cpu)
-{
-       int phys_id = topology_physical_package_id(cpu);
-       int i;
-
-       /*
-        * Is @cpu a designated cqm reader?
-        */
-       if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
-               return;
-
-       for_each_online_cpu(i) {
-               if (i == cpu)
-                       continue;
-
-               if (phys_id == topology_physical_package_id(i)) {
-                       cpumask_set_cpu(i, &cqm_cpumask);
-                       break;
-               }
-       }
-}
-
-static int intel_cqm_cpu_notifier(struct notifier_block *nb,
-                                 unsigned long action, void *hcpu)
-{
-       unsigned int cpu  = (unsigned long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_DOWN_PREPARE:
-               intel_cqm_cpu_exit(cpu);
-               break;
-       case CPU_STARTING:
-               intel_cqm_cpu_starting(cpu);
-               cqm_pick_event_reader(cpu);
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static const struct x86_cpu_id intel_cqm_match[] = {
-       { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
-       {}
-};
-
-static int __init intel_cqm_init(void)
-{
-       char *str, scale[20];
-       int i, cpu, ret;
-
-       if (!x86_match_cpu(intel_cqm_match))
-               return -ENODEV;
-
-       cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
-
-       /*
-        * It's possible that not all resources support the same number
-        * of RMIDs. Instead of making scheduling much more complicated
-        * (where we have to match a task's RMID to a cpu that supports
-        * that many RMIDs) just find the minimum RMIDs supported across
-        * all cpus.
-        *
-        * Also, check that the scales match on all cpus.
-        */
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu) {
-               struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-               if (c->x86_cache_max_rmid < cqm_max_rmid)
-                       cqm_max_rmid = c->x86_cache_max_rmid;
-
-               if (c->x86_cache_occ_scale != cqm_l3_scale) {
-                       pr_err("Multiple LLC scale values, disabling\n");
-                       ret = -EINVAL;
-                       goto out;
-               }
-       }
-
-       /*
-        * A reasonable upper limit on the max threshold is the number
-        * of lines tagged per RMID if all RMIDs have the same number of
-        * lines tagged in the LLC.
-        *
-        * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
-        */
-       __intel_cqm_max_threshold =
-               boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
-
-       snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
-       str = kstrdup(scale, GFP_KERNEL);
-       if (!str) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       event_attr_intel_cqm_llc_scale.event_str = str;
-
-       ret = intel_cqm_setup_rmid_cache();
-       if (ret)
-               goto out;
-
-       for_each_online_cpu(i) {
-               intel_cqm_cpu_starting(i);
-               cqm_pick_event_reader(i);
-       }
-
-       __perf_cpu_notifier(intel_cqm_cpu_notifier);
-
-       ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
-       if (ret)
-               pr_err("Intel CQM perf registration failed: %d\n", ret);
-       else
-               pr_info("Intel CQM monitoring enabled\n");
-
-out:
-       cpu_notifier_register_done();
-
-       return ret;
-}
-device_initcall(intel_cqm_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c
deleted file mode 100644 (file)
index 75a38b5..0000000
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * perf_event_intel_cstate.c: support cstate residency counters
- *
- * Copyright (C) 2015, Intel Corp.
- * Author: Kan Liang (kan.liang@intel.com)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- */
-
-/*
- * This file export cstate related free running (read-only) counters
- * for perf. These counters may be use simultaneously by other tools,
- * such as turbostat. However, it still make sense to implement them
- * in perf. Because we can conveniently collect them together with
- * other events, and allow to use them from tools without special MSR
- * access code.
- *
- * The events only support system-wide mode counting. There is no
- * sampling support because it is not supported by the hardware.
- *
- * According to counters' scope and category, two PMUs are registered
- * with the perf_event core subsystem.
- *  - 'cstate_core': The counter is available for each physical core.
- *    The counters include CORE_C*_RESIDENCY.
- *  - 'cstate_pkg': The counter is available for each physical package.
- *    The counters include PKG_C*_RESIDENCY.
- *
- * All of these counters are specified in the Intel® 64 and IA-32
- * Architectures Software Developer.s Manual Vol3b.
- *
- * Model specific counters:
- *     MSR_CORE_C1_RES: CORE C1 Residency Counter
- *                      perf code: 0x00
- *                      Available model: SLM,AMT
- *                      Scope: Core (each processor core has a MSR)
- *     MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
- *                            perf code: 0x01
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Core
- *     MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
- *                            perf code: 0x02
- *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Core
- *     MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
- *                            perf code: 0x03
- *                            Available model: SNB,IVB,HSW,BDW,SKL
- *                            Scope: Core
- *     MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
- *                            perf code: 0x00
- *                            Available model: SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
- *                            perf code: 0x01
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
- *                            perf code: 0x02
- *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
- *                            perf code: 0x03
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
- *                            Scope: Package (physical package)
- *     MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
- *                            perf code: 0x04
- *                            Available model: HSW ULT only
- *                            Scope: Package (physical package)
- *     MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
- *                            perf code: 0x05
- *                            Available model: HSW ULT only
- *                            Scope: Package (physical package)
- *     MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
- *                            perf code: 0x06
- *                            Available model: HSW ULT only
- *                            Scope: Package (physical package)
- *
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)                \
-static ssize_t __cstate_##_var##_show(struct kobject *kobj,    \
-                               struct kobj_attribute *attr,    \
-                               char *page)                     \
-{                                                              \
-       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
-       return sprintf(page, _format "\n");                     \
-}                                                              \
-static struct kobj_attribute format_attr_##_var =              \
-       __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
-                                      struct device_attribute *attr,
-                                      char *buf);
-
-struct perf_cstate_msr {
-       u64     msr;
-       struct  perf_pmu_events_attr *attr;
-       bool    (*test)(int idx);
-};
-
-
-/* cstate_core PMU */
-
-static struct pmu cstate_core_pmu;
-static bool has_cstate_core;
-
-enum perf_cstate_core_id {
-       /*
-        * cstate_core events
-        */
-       PERF_CSTATE_CORE_C1_RES = 0,
-       PERF_CSTATE_CORE_C3_RES,
-       PERF_CSTATE_CORE_C6_RES,
-       PERF_CSTATE_CORE_C7_RES,
-
-       PERF_CSTATE_CORE_EVENT_MAX,
-};
-
-bool test_core(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C1_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
-PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
-
-static struct perf_cstate_msr core_msr[] = {
-       [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,          &evattr_cstate_core_c1, test_core, },
-       [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &evattr_cstate_core_c3, test_core, },
-       [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &evattr_cstate_core_c6, test_core, },
-       [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &evattr_cstate_core_c7, test_core, },
-};
-
-static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
-       NULL,
-};
-
-static struct attribute_group core_events_attr_group = {
-       .name = "events",
-       .attrs = core_events_attrs,
-};
-
-DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
-static struct attribute *core_format_attrs[] = {
-       &format_attr_core_event.attr,
-       NULL,
-};
-
-static struct attribute_group core_format_attr_group = {
-       .name = "format",
-       .attrs = core_format_attrs,
-};
-
-static cpumask_t cstate_core_cpu_mask;
-static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
-
-static struct attribute *cstate_cpumask_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group cpumask_attr_group = {
-       .attrs = cstate_cpumask_attrs,
-};
-
-static const struct attribute_group *core_attr_groups[] = {
-       &core_events_attr_group,
-       &core_format_attr_group,
-       &cpumask_attr_group,
-       NULL,
-};
-
-/* cstate_core PMU end */
-
-
-/* cstate_pkg PMU */
-
-static struct pmu cstate_pkg_pmu;
-static bool has_cstate_pkg;
-
-enum perf_cstate_pkg_id {
-       /*
-        * cstate_pkg events
-        */
-       PERF_CSTATE_PKG_C2_RES = 0,
-       PERF_CSTATE_PKG_C3_RES,
-       PERF_CSTATE_PKG_C6_RES,
-       PERF_CSTATE_PKG_C7_RES,
-       PERF_CSTATE_PKG_C8_RES,
-       PERF_CSTATE_PKG_C9_RES,
-       PERF_CSTATE_PKG_C10_RES,
-
-       PERF_CSTATE_PKG_EVENT_MAX,
-};
-
-bool test_pkg(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-               if (idx == PERF_CSTATE_CORE_C3_RES ||
-                   idx == PERF_CSTATE_CORE_C6_RES ||
-                   idx == PERF_CSTATE_CORE_C7_RES)
-                       return true;
-               break;
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES)
-                       return true;
-               break;
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_CSTATE_CORE_C6_RES)
-                       return true;
-               break;
-       case 69: /* 22nm Haswell ULT */
-               if (idx == PERF_CSTATE_PKG_C2_RES ||
-                   idx == PERF_CSTATE_PKG_C3_RES ||
-                   idx == PERF_CSTATE_PKG_C6_RES ||
-                   idx == PERF_CSTATE_PKG_C7_RES ||
-                   idx == PERF_CSTATE_PKG_C8_RES ||
-                   idx == PERF_CSTATE_PKG_C9_RES ||
-                   idx == PERF_CSTATE_PKG_C10_RES)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
-PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
-PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
-PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
-PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03");
-PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04");
-PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
-PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
-
-static struct perf_cstate_msr pkg_msr[] = {
-       [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,      &evattr_cstate_pkg_c2,  test_pkg, },
-       [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,      &evattr_cstate_pkg_c3,  test_pkg, },
-       [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,      &evattr_cstate_pkg_c6,  test_pkg, },
-       [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,      &evattr_cstate_pkg_c7,  test_pkg, },
-       [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,      &evattr_cstate_pkg_c8,  test_pkg, },
-       [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,      &evattr_cstate_pkg_c9,  test_pkg, },
-       [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &evattr_cstate_pkg_c10, test_pkg, },
-};
-
-static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
-       NULL,
-};
-
-static struct attribute_group pkg_events_attr_group = {
-       .name = "events",
-       .attrs = pkg_events_attrs,
-};
-
-DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
-static struct attribute *pkg_format_attrs[] = {
-       &format_attr_pkg_event.attr,
-       NULL,
-};
-static struct attribute_group pkg_format_attr_group = {
-       .name = "format",
-       .attrs = pkg_format_attrs,
-};
-
-static cpumask_t cstate_pkg_cpu_mask;
-
-static const struct attribute_group *pkg_attr_groups[] = {
-       &pkg_events_attr_group,
-       &pkg_format_attr_group,
-       &cpumask_attr_group,
-       NULL,
-};
-
-/* cstate_pkg PMU end*/
-
-static ssize_t cstate_get_attr_cpumask(struct device *dev,
-                                      struct device_attribute *attr,
-                                      char *buf)
-{
-       struct pmu *pmu = dev_get_drvdata(dev);
-
-       if (pmu == &cstate_core_pmu)
-               return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
-       else if (pmu == &cstate_pkg_pmu)
-               return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
-       else
-               return 0;
-}
-
-static int cstate_pmu_event_init(struct perf_event *event)
-{
-       u64 cfg = event->attr.config;
-       int ret = 0;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       if (event->pmu == &cstate_core_pmu) {
-               if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
-                       return -EINVAL;
-               if (!core_msr[cfg].attr)
-                       return -EINVAL;
-               event->hw.event_base = core_msr[cfg].msr;
-       } else if (event->pmu == &cstate_pkg_pmu) {
-               if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
-                       return -EINVAL;
-               if (!pkg_msr[cfg].attr)
-                       return -EINVAL;
-               event->hw.event_base = pkg_msr[cfg].msr;
-       } else
-               return -ENOENT;
-
-       /* must be done before validate_group */
-       event->hw.config = cfg;
-       event->hw.idx = -1;
-
-       return ret;
-}
-
-static inline u64 cstate_pmu_read_counter(struct perf_event *event)
-{
-       u64 val;
-
-       rdmsrl(event->hw.event_base, val);
-       return val;
-}
-
-static void cstate_pmu_event_update(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 prev_raw_count, new_raw_count;
-
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       new_raw_count = cstate_pmu_read_counter(event);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                           new_raw_count) != prev_raw_count)
-               goto again;
-
-       local64_add(new_raw_count - prev_raw_count, &event->count);
-}
-
-static void cstate_pmu_event_start(struct perf_event *event, int mode)
-{
-       local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
-}
-
-static void cstate_pmu_event_stop(struct perf_event *event, int mode)
-{
-       cstate_pmu_event_update(event);
-}
-
-static void cstate_pmu_event_del(struct perf_event *event, int mode)
-{
-       cstate_pmu_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int cstate_pmu_event_add(struct perf_event *event, int mode)
-{
-       if (mode & PERF_EF_START)
-               cstate_pmu_event_start(event, mode);
-
-       return 0;
-}
-
-static void cstate_cpu_exit(int cpu)
-{
-       int i, id, target;
-
-       /* cpu exit for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_core_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
-                       cpumask_set_cpu(target, &cstate_core_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
-               if (target >= 0)
-                       perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
-       }
-
-       /* cpu exit for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               target = -1;
-
-               for_each_online_cpu(i) {
-                       if (i == cpu)
-                               continue;
-                       if (id == topology_physical_package_id(i)) {
-                               target = i;
-                               break;
-                       }
-               }
-               if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
-                       cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
-               WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
-               if (target >= 0)
-                       perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
-       }
-}
-
-static void cstate_cpu_init(int cpu)
-{
-       int i, id;
-
-       /* cpu init for cstate core */
-       if (has_cstate_core) {
-               id = topology_core_id(cpu);
-               for_each_cpu(i, &cstate_core_cpu_mask) {
-                       if (id == topology_core_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
-       }
-
-       /* cpu init for cstate pkg */
-       if (has_cstate_pkg) {
-               id = topology_physical_package_id(cpu);
-               for_each_cpu(i, &cstate_pkg_cpu_mask) {
-                       if (id == topology_physical_package_id(i))
-                               break;
-               }
-               if (i >= nr_cpu_ids)
-                       cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-       }
-}
-
-static int cstate_cpu_notifier(struct notifier_block *self,
-                                 unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               break;
-       case CPU_STARTING:
-               cstate_cpu_init(cpu);
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               break;
-       case CPU_DOWN_PREPARE:
-               cstate_cpu_exit(cpu);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
-                            struct attribute   **events_attrs,
-                            int max_event_nr)
-{
-       int i, j = 0;
-       u64 val;
-
-       /* Probe the cstate events. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
-                       msr[i].attr = NULL;
-       }
-
-       /* List remaining events in the sysfs attrs. */
-       for (i = 0; i < max_event_nr; i++) {
-               if (msr[i].attr)
-                       events_attrs[j++] = &msr[i].attr->attr.attr;
-       }
-       events_attrs[j] = NULL;
-
-       return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
-       /* SLM has different MSR for PKG C6 */
-       switch (boot_cpu_data.x86_model) {
-       case 55:
-       case 76:
-       case 77:
-               pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
-       }
-
-       if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
-               has_cstate_core = true;
-
-       if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
-               has_cstate_pkg = true;
-
-       return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
-       int cpu;
-
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu)
-               cstate_cpu_init(cpu);
-
-       __perf_cpu_notifier(cstate_cpu_notifier);
-
-       cpu_notifier_register_done();
-}
-
-static struct pmu cstate_core_pmu = {
-       .attr_groups    = core_attr_groups,
-       .name           = "cstate_core",
-       .task_ctx_nr    = perf_invalid_context,
-       .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
-       .start          = cstate_pmu_event_start,
-       .stop           = cstate_pmu_event_stop,
-       .read           = cstate_pmu_event_update,
-       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static struct pmu cstate_pkg_pmu = {
-       .attr_groups    = pkg_attr_groups,
-       .name           = "cstate_pkg",
-       .task_ctx_nr    = perf_invalid_context,
-       .event_init     = cstate_pmu_event_init,
-       .add            = cstate_pmu_event_add, /* must have */
-       .del            = cstate_pmu_event_del, /* must have */
-       .start          = cstate_pmu_event_start,
-       .stop           = cstate_pmu_event_stop,
-       .read           = cstate_pmu_event_update,
-       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static void __init cstate_pmus_register(void)
-{
-       int err;
-
-       if (has_cstate_core) {
-               err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_core_pmu.name, err);
-       }
-
-       if (has_cstate_pkg) {
-               err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
-               if (WARN_ON(err))
-                       pr_info("Failed to register PMU %s error %d\n",
-                               cstate_pkg_pmu.name, err);
-       }
-}
-
-static int __init cstate_pmu_init(void)
-{
-       int err;
-
-       if (cpu_has_hypervisor)
-               return -ENODEV;
-
-       err = cstate_init();
-       if (err)
-               return err;
-
-       cstate_cpumask_init();
-
-       cstate_pmus_register();
-
-       return 0;
-}
-
-device_initcall(cstate_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
deleted file mode 100644 (file)
index 10602f0..0000000
+++ /dev/null
@@ -1,1368 +0,0 @@
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-
-#include <asm/perf_event.h>
-#include <asm/insn.h>
-
-#include "perf_event.h"
-
-/* The size of a BTS record in bytes: */
-#define BTS_RECORD_SIZE                24
-
-#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
-#define PEBS_FIXUP_SIZE                PAGE_SIZE
-
-/*
- * pebs_record_32 for p4 and core not supported
-
-struct pebs_record_32 {
-       u32 flags, ip;
-       u32 ax, bc, cx, dx;
-       u32 si, di, bp, sp;
-};
-
- */
-
-union intel_x86_pebs_dse {
-       u64 val;
-       struct {
-               unsigned int ld_dse:4;
-               unsigned int ld_stlb_miss:1;
-               unsigned int ld_locked:1;
-               unsigned int ld_reserved:26;
-       };
-       struct {
-               unsigned int st_l1d_hit:1;
-               unsigned int st_reserved1:3;
-               unsigned int st_stlb_miss:1;
-               unsigned int st_locked:1;
-               unsigned int st_reserved2:26;
-       };
-};
-
-
-/*
- * Map PEBS Load Latency Data Source encodings to generic
- * memory data source information
- */
-#define P(a, b) PERF_MEM_S(a, b)
-#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
-#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
-
-static const u64 pebs_data_source[] = {
-       P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
-       OP_LH | P(LVL, L1)  | P(SNOOP, NONE),   /* 0x01: L1 local */
-       OP_LH | P(LVL, LFB) | P(SNOOP, NONE),   /* 0x02: LFB hit */
-       OP_LH | P(LVL, L2)  | P(SNOOP, NONE),   /* 0x03: L2 hit */
-       OP_LH | P(LVL, L3)  | P(SNOOP, NONE),   /* 0x04: L3 hit */
-       OP_LH | P(LVL, L3)  | P(SNOOP, MISS),   /* 0x05: L3 hit, snoop miss */
-       OP_LH | P(LVL, L3)  | P(SNOOP, HIT),    /* 0x06: L3 hit, snoop hit */
-       OP_LH | P(LVL, L3)  | P(SNOOP, HITM),   /* 0x07: L3 hit, snoop hitm */
-       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT),  /* 0x08: L3 miss snoop hit */
-       OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
-       OP_LH | P(LVL, LOC_RAM)  | P(SNOOP, HIT),  /* 0x0a: L3 miss, shared */
-       OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT),  /* 0x0b: L3 miss, shared */
-       OP_LH | P(LVL, LOC_RAM)  | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
-       OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
-       OP_LH | P(LVL, IO)  | P(SNOOP, NONE), /* 0x0e: I/O */
-       OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
-};
-
-static u64 precise_store_data(u64 status)
-{
-       union intel_x86_pebs_dse dse;
-       u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
-
-       dse.val = status;
-
-       /*
-        * bit 4: TLB access
-        * 1 = stored missed 2nd level TLB
-        *
-        * so it either hit the walker or the OS
-        * otherwise hit 2nd level TLB
-        */
-       if (dse.st_stlb_miss)
-               val |= P(TLB, MISS);
-       else
-               val |= P(TLB, HIT);
-
-       /*
-        * bit 0: hit L1 data cache
-        * if not set, then all we know is that
-        * it missed L1D
-        */
-       if (dse.st_l1d_hit)
-               val |= P(LVL, HIT);
-       else
-               val |= P(LVL, MISS);
-
-       /*
-        * bit 5: Locked prefix
-        */
-       if (dse.st_locked)
-               val |= P(LOCK, LOCKED);
-
-       return val;
-}
-
-static u64 precise_datala_hsw(struct perf_event *event, u64 status)
-{
-       union perf_mem_data_src dse;
-
-       dse.val = PERF_MEM_NA;
-
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
-               dse.mem_op = PERF_MEM_OP_STORE;
-       else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
-               dse.mem_op = PERF_MEM_OP_LOAD;
-
-       /*
-        * L1 info only valid for following events:
-        *
-        * MEM_UOPS_RETIRED.STLB_MISS_STORES
-        * MEM_UOPS_RETIRED.LOCK_STORES
-        * MEM_UOPS_RETIRED.SPLIT_STORES
-        * MEM_UOPS_RETIRED.ALL_STORES
-        */
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) {
-               if (status & 1)
-                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
-               else
-                       dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
-       }
-       return dse.val;
-}
-
-static u64 load_latency_data(u64 status)
-{
-       union intel_x86_pebs_dse dse;
-       u64 val;
-       int model = boot_cpu_data.x86_model;
-       int fam = boot_cpu_data.x86;
-
-       dse.val = status;
-
-       /*
-        * use the mapping table for bit 0-3
-        */
-       val = pebs_data_source[dse.ld_dse];
-
-       /*
-        * Nehalem models do not support TLB, Lock infos
-        */
-       if (fam == 0x6 && (model == 26 || model == 30
-           || model == 31 || model == 46)) {
-               val |= P(TLB, NA) | P(LOCK, NA);
-               return val;
-       }
-       /*
-        * bit 4: TLB access
-        * 0 = did not miss 2nd level TLB
-        * 1 = missed 2nd level TLB
-        */
-       if (dse.ld_stlb_miss)
-               val |= P(TLB, MISS) | P(TLB, L2);
-       else
-               val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
-
-       /*
-        * bit 5: locked prefix
-        */
-       if (dse.ld_locked)
-               val |= P(LOCK, LOCKED);
-
-       return val;
-}
-
-struct pebs_record_core {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-};
-
-struct pebs_record_nhm {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-       u64 status, dla, dse, lat;
-};
-
-/*
- * Same as pebs_record_nhm, with two additional fields.
- */
-struct pebs_record_hsw {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-       u64 status, dla, dse, lat;
-       u64 real_ip, tsx_tuning;
-};
-
-union hsw_tsx_tuning {
-       struct {
-               u32 cycles_last_block     : 32,
-                   hle_abort             : 1,
-                   rtm_abort             : 1,
-                   instruction_abort     : 1,
-                   non_instruction_abort : 1,
-                   retry                 : 1,
-                   data_conflict         : 1,
-                   capacity_writes       : 1,
-                   capacity_reads        : 1;
-       };
-       u64         value;
-};
-
-#define PEBS_HSW_TSX_FLAGS     0xff00000000ULL
-
-/* Same as HSW, plus TSC */
-
-struct pebs_record_skl {
-       u64 flags, ip;
-       u64 ax, bx, cx, dx;
-       u64 si, di, bp, sp;
-       u64 r8,  r9,  r10, r11;
-       u64 r12, r13, r14, r15;
-       u64 status, dla, dse, lat;
-       u64 real_ip, tsx_tuning;
-       u64 tsc;
-};
-
-void init_debug_store_on_cpu(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds)
-               return;
-
-       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
-                    (u32)((u64)(unsigned long)ds),
-                    (u32)((u64)(unsigned long)ds >> 32));
-}
-
-void fini_debug_store_on_cpu(int cpu)
-{
-       if (!per_cpu(cpu_hw_events, cpu).ds)
-               return;
-
-       wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
-}
-
-static DEFINE_PER_CPU(void *, insn_buffer);
-
-static int alloc_pebs_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       int node = cpu_to_node(cpu);
-       int max;
-       void *buffer, *ibuffer;
-
-       if (!x86_pmu.pebs)
-               return 0;
-
-       buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
-       if (unlikely(!buffer))
-               return -ENOMEM;
-
-       /*
-        * HSW+ already provides us the eventing ip; no need to allocate this
-        * buffer then.
-        */
-       if (x86_pmu.intel_cap.pebs_format < 2) {
-               ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
-               if (!ibuffer) {
-                       kfree(buffer);
-                       return -ENOMEM;
-               }
-               per_cpu(insn_buffer, cpu) = ibuffer;
-       }
-
-       max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
-
-       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
-       ds->pebs_index = ds->pebs_buffer_base;
-       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-               max * x86_pmu.pebs_record_size;
-
-       return 0;
-}
-
-static void release_pebs_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds || !x86_pmu.pebs)
-               return;
-
-       kfree(per_cpu(insn_buffer, cpu));
-       per_cpu(insn_buffer, cpu) = NULL;
-
-       kfree((void *)(unsigned long)ds->pebs_buffer_base);
-       ds->pebs_buffer_base = 0;
-}
-
-static int alloc_bts_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       int node = cpu_to_node(cpu);
-       int max, thresh;
-       void *buffer;
-
-       if (!x86_pmu.bts)
-               return 0;
-
-       buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
-       if (unlikely(!buffer)) {
-               WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
-               return -ENOMEM;
-       }
-
-       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-       thresh = max / 16;
-
-       ds->bts_buffer_base = (u64)(unsigned long)buffer;
-       ds->bts_index = ds->bts_buffer_base;
-       ds->bts_absolute_maximum = ds->bts_buffer_base +
-               max * BTS_RECORD_SIZE;
-       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-               thresh * BTS_RECORD_SIZE;
-
-       return 0;
-}
-
-static void release_bts_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds || !x86_pmu.bts)
-               return;
-
-       kfree((void *)(unsigned long)ds->bts_buffer_base);
-       ds->bts_buffer_base = 0;
-}
-
-static int alloc_ds_buffer(int cpu)
-{
-       int node = cpu_to_node(cpu);
-       struct debug_store *ds;
-
-       ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-       if (unlikely(!ds))
-               return -ENOMEM;
-
-       per_cpu(cpu_hw_events, cpu).ds = ds;
-
-       return 0;
-}
-
-static void release_ds_buffer(int cpu)
-{
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds)
-               return;
-
-       per_cpu(cpu_hw_events, cpu).ds = NULL;
-       kfree(ds);
-}
-
-void release_ds_buffers(void)
-{
-       int cpu;
-
-       if (!x86_pmu.bts && !x86_pmu.pebs)
-               return;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu)
-               fini_debug_store_on_cpu(cpu);
-
-       for_each_possible_cpu(cpu) {
-               release_pebs_buffer(cpu);
-               release_bts_buffer(cpu);
-               release_ds_buffer(cpu);
-       }
-       put_online_cpus();
-}
-
-void reserve_ds_buffers(void)
-{
-       int bts_err = 0, pebs_err = 0;
-       int cpu;
-
-       x86_pmu.bts_active = 0;
-       x86_pmu.pebs_active = 0;
-
-       if (!x86_pmu.bts && !x86_pmu.pebs)
-               return;
-
-       if (!x86_pmu.bts)
-               bts_err = 1;
-
-       if (!x86_pmu.pebs)
-               pebs_err = 1;
-
-       get_online_cpus();
-
-       for_each_possible_cpu(cpu) {
-               if (alloc_ds_buffer(cpu)) {
-                       bts_err = 1;
-                       pebs_err = 1;
-               }
-
-               if (!bts_err && alloc_bts_buffer(cpu))
-                       bts_err = 1;
-
-               if (!pebs_err && alloc_pebs_buffer(cpu))
-                       pebs_err = 1;
-
-               if (bts_err && pebs_err)
-                       break;
-       }
-
-       if (bts_err) {
-               for_each_possible_cpu(cpu)
-                       release_bts_buffer(cpu);
-       }
-
-       if (pebs_err) {
-               for_each_possible_cpu(cpu)
-                       release_pebs_buffer(cpu);
-       }
-
-       if (bts_err && pebs_err) {
-               for_each_possible_cpu(cpu)
-                       release_ds_buffer(cpu);
-       } else {
-               if (x86_pmu.bts && !bts_err)
-                       x86_pmu.bts_active = 1;
-
-               if (x86_pmu.pebs && !pebs_err)
-                       x86_pmu.pebs_active = 1;
-
-               for_each_online_cpu(cpu)
-                       init_debug_store_on_cpu(cpu);
-       }
-
-       put_online_cpus();
-}
-
-/*
- * BTS
- */
-
-struct event_constraint bts_constraint =
-       EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
-
-void intel_pmu_enable_bts(u64 config)
-{
-       unsigned long debugctlmsr;
-
-       debugctlmsr = get_debugctlmsr();
-
-       debugctlmsr |= DEBUGCTLMSR_TR;
-       debugctlmsr |= DEBUGCTLMSR_BTS;
-       if (config & ARCH_PERFMON_EVENTSEL_INT)
-               debugctlmsr |= DEBUGCTLMSR_BTINT;
-
-       if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
-
-       if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
-
-       update_debugctlmsr(debugctlmsr);
-}
-
-void intel_pmu_disable_bts(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       unsigned long debugctlmsr;
-
-       if (!cpuc->ds)
-               return;
-
-       debugctlmsr = get_debugctlmsr();
-
-       debugctlmsr &=
-               ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
-                 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
-
-       update_debugctlmsr(debugctlmsr);
-}
-
-int intel_pmu_drain_bts_buffer(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct bts_record {
-               u64     from;
-               u64     to;
-               u64     flags;
-       };
-       struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
-       struct bts_record *at, *base, *top;
-       struct perf_output_handle handle;
-       struct perf_event_header header;
-       struct perf_sample_data data;
-       unsigned long skip = 0;
-       struct pt_regs regs;
-
-       if (!event)
-               return 0;
-
-       if (!x86_pmu.bts_active)
-               return 0;
-
-       base = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-       top  = (struct bts_record *)(unsigned long)ds->bts_index;
-
-       if (top <= base)
-               return 0;
-
-       memset(&regs, 0, sizeof(regs));
-
-       ds->bts_index = ds->bts_buffer_base;
-
-       perf_sample_data_init(&data, 0, event->hw.last_period);
-
-       /*
-        * BTS leaks kernel addresses in branches across the cpl boundary,
-        * such as traps or system calls, so unless the user is asking for
-        * kernel tracing (and right now it's not possible), we'd need to
-        * filter them out. But first we need to count how many of those we
-        * have in the current batch. This is an extra O(n) pass, however,
-        * it's much faster than the other one especially considering that
-        * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the
-        * alloc_bts_buffer()).
-        */
-       for (at = base; at < top; at++) {
-               /*
-                * Note that right now *this* BTS code only works if
-                * attr::exclude_kernel is set, but let's keep this extra
-                * check here in case that changes.
-                */
-               if (event->attr.exclude_kernel &&
-                   (kernel_ip(at->from) || kernel_ip(at->to)))
-                       skip++;
-       }
-
-       /*
-        * Prepare a generic sample, i.e. fill in the invariant fields.
-        * We will overwrite the from and to address before we output
-        * the sample.
-        */
-       perf_prepare_sample(&header, &data, event, &regs);
-
-       if (perf_output_begin(&handle, event, header.size *
-                             (top - base - skip)))
-               return 1;
-
-       for (at = base; at < top; at++) {
-               /* Filter out any records that contain kernel addresses. */
-               if (event->attr.exclude_kernel &&
-                   (kernel_ip(at->from) || kernel_ip(at->to)))
-                       continue;
-
-               data.ip         = at->from;
-               data.addr       = at->to;
-
-               perf_output_sample(&handle, &header, &data, event);
-       }
-
-       perf_output_end(&handle);
-
-       /* There's new data available. */
-       event->hw.interrupts++;
-       event->pending_kill = POLL_IN;
-       return 1;
-}
-
-static inline void intel_pmu_drain_pebs_buffer(void)
-{
-       struct pt_regs regs;
-
-       x86_pmu.drain_pebs(&regs);
-}
-
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-       if (!sched_in)
-               intel_pmu_drain_pebs_buffer();
-}
-
-/*
- * PEBS
- */
-struct event_constraint intel_core2_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_atom_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_slm_pebs_event_constraints[] = {
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_nehalem_pebs_event_constraints[] = {
-       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INST_RETIRED.ANY */
-       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_westmere_pebs_event_constraints[] = {
-       INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xf),    /* INSTR_RETIRED.* */
-       INTEL_EVENT_CONSTRAINT(0xc2, 0xf),    /* UOPS_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xc7, 0xf),    /* SSEX_UOPS_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
-       /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_snb_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
-       INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
-       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
-       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
-        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
-        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-        INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_ivb_pebs_event_constraints[] = {
-        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
-        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
-       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
-       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
-       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
-       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-        EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_hsw_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
-       INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
-       /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
-       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x12d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x42d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd2, 0xf),    /* MEM_LOAD_UOPS_L3_HIT_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(0xd3, 0xf),    /* MEM_LOAD_UOPS_L3_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint intel_skl_pebs_event_constraints[] = {
-       INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
-       /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
-       /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
-       INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
-       INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x22d0, 0xf), /* MEM_INST_RETIRED.LOCK_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */
-       INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd1, 0xf),    /* MEM_LOAD_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd2, 0xf),    /* MEM_LOAD_L3_HIT_RETIRED.* */
-       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(0xd3, 0xf),    /* MEM_LOAD_L3_MISS_RETIRED.* */
-       /* Allow all events as PEBS with no flags */
-       INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
-       EVENT_CONSTRAINT_END
-};
-
-struct event_constraint *intel_pebs_constraints(struct perf_event *event)
-{
-       struct event_constraint *c;
-
-       if (!event->attr.precise_ip)
-               return NULL;
-
-       if (x86_pmu.pebs_constraints) {
-               for_each_event_constraint(c, x86_pmu.pebs_constraints) {
-                       if ((event->hw.config & c->cmask) == c->code) {
-                               event->hw.flags |= c->flags;
-                               return c;
-                       }
-               }
-       }
-
-       return &emptyconstraint;
-}
-
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
-{
-       return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
-}
-
-void intel_pmu_pebs_enable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       struct debug_store *ds = cpuc->ds;
-       bool first_pebs;
-       u64 threshold;
-
-       hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
-
-       first_pebs = !pebs_is_enabled(cpuc);
-       cpuc->pebs_enabled |= 1ULL << hwc->idx;
-
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
-               cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
-       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
-               cpuc->pebs_enabled |= 1ULL << 63;
-
-       /*
-        * When the event is constrained enough we can use a larger
-        * threshold and run the event with less frequent PMI.
-        */
-       if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
-               threshold = ds->pebs_absolute_maximum -
-                       x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
-               if (first_pebs)
-                       perf_sched_cb_inc(event->ctx->pmu);
-       } else {
-               threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-               /*
-                * If not all events can use larger buffer,
-                * roll back to threshold = 1
-                */
-               if (!first_pebs &&
-                   (ds->pebs_interrupt_threshold > threshold))
-                       perf_sched_cb_dec(event->ctx->pmu);
-       }
-
-       /* Use auto-reload if possible to save a MSR write in the PMI */
-       if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
-               ds->pebs_event_reset[hwc->idx] =
-                       (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
-       }
-
-       if (first_pebs || ds->pebs_interrupt_threshold > threshold)
-               ds->pebs_interrupt_threshold = threshold;
-}
-
-void intel_pmu_pebs_disable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct hw_perf_event *hwc = &event->hw;
-       struct debug_store *ds = cpuc->ds;
-       bool large_pebs = ds->pebs_interrupt_threshold >
-               ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-       if (large_pebs)
-               intel_pmu_drain_pebs_buffer();
-
-       cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
-
-       if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
-               cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
-       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
-               cpuc->pebs_enabled &= ~(1ULL << 63);
-
-       if (large_pebs && !pebs_is_enabled(cpuc))
-               perf_sched_cb_dec(event->ctx->pmu);
-
-       if (cpuc->enabled)
-               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
-
-       hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-}
-
-void intel_pmu_pebs_enable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->pebs_enabled)
-               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
-}
-
-void intel_pmu_pebs_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->pebs_enabled)
-               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
-}
-
-static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       unsigned long from = cpuc->lbr_entries[0].from;
-       unsigned long old_to, to = cpuc->lbr_entries[0].to;
-       unsigned long ip = regs->ip;
-       int is_64bit = 0;
-       void *kaddr;
-       int size;
-
-       /*
-        * We don't need to fixup if the PEBS assist is fault like
-        */
-       if (!x86_pmu.intel_cap.pebs_trap)
-               return 1;
-
-       /*
-        * No LBR entry, no basic block, no rewinding
-        */
-       if (!cpuc->lbr_stack.nr || !from || !to)
-               return 0;
-
-       /*
-        * Basic blocks should never cross user/kernel boundaries
-        */
-       if (kernel_ip(ip) != kernel_ip(to))
-               return 0;
-
-       /*
-        * unsigned math, either ip is before the start (impossible) or
-        * the basic block is larger than 1 page (sanity)
-        */
-       if ((ip - to) > PEBS_FIXUP_SIZE)
-               return 0;
-
-       /*
-        * We sampled a branch insn, rewind using the LBR stack
-        */
-       if (ip == to) {
-               set_linear_ip(regs, from);
-               return 1;
-       }
-
-       size = ip - to;
-       if (!kernel_ip(ip)) {
-               int bytes;
-               u8 *buf = this_cpu_read(insn_buffer);
-
-               /* 'size' must fit our buffer, see above */
-               bytes = copy_from_user_nmi(buf, (void __user *)to, size);
-               if (bytes != 0)
-                       return 0;
-
-               kaddr = buf;
-       } else {
-               kaddr = (void *)to;
-       }
-
-       do {
-               struct insn insn;
-
-               old_to = to;
-
-#ifdef CONFIG_X86_64
-               is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
-#endif
-               insn_init(&insn, kaddr, size, is_64bit);
-               insn_get_length(&insn);
-               /*
-                * Make sure there was not a problem decoding the
-                * instruction and getting the length.  This is
-                * doubly important because we have an infinite
-                * loop if insn.length=0.
-                */
-               if (!insn.length)
-                       break;
-
-               to += insn.length;
-               kaddr += insn.length;
-               size -= insn.length;
-       } while (to < ip);
-
-       if (to == ip) {
-               set_linear_ip(regs, old_to);
-               return 1;
-       }
-
-       /*
-        * Even though we decoded the basic block, the instruction stream
-        * never matched the given IP, either the TO or the IP got corrupted.
-        */
-       return 0;
-}
-
-static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
-{
-       if (pebs->tsx_tuning) {
-               union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
-               return tsx.cycles_last_block;
-       }
-       return 0;
-}
-
-static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
-{
-       u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
-
-       /* For RTM XABORTs also log the abort code from AX */
-       if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
-               txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
-       return txn;
-}
-
-static void setup_pebs_sample_data(struct perf_event *event,
-                                  struct pt_regs *iregs, void *__pebs,
-                                  struct perf_sample_data *data,
-                                  struct pt_regs *regs)
-{
-#define PERF_X86_EVENT_PEBS_HSW_PREC \
-               (PERF_X86_EVENT_PEBS_ST_HSW | \
-                PERF_X86_EVENT_PEBS_LD_HSW | \
-                PERF_X86_EVENT_PEBS_NA_HSW)
-       /*
-        * We cast to the biggest pebs_record but are careful not to
-        * unconditionally access the 'extra' entries.
-        */
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct pebs_record_skl *pebs = __pebs;
-       u64 sample_type;
-       int fll, fst, dsrc;
-       int fl = event->hw.flags;
-
-       if (pebs == NULL)
-               return;
-
-       sample_type = event->attr.sample_type;
-       dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
-
-       fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
-       fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
-
-       perf_sample_data_init(data, 0, event->hw.last_period);
-
-       data->period = event->hw.last_period;
-
-       /*
-        * Use latency for weight (only avail with PEBS-LL)
-        */
-       if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
-               data->weight = pebs->lat;
-
-       /*
-        * data.data_src encodes the data source
-        */
-       if (dsrc) {
-               u64 val = PERF_MEM_NA;
-               if (fll)
-                       val = load_latency_data(pebs->dse);
-               else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
-                       val = precise_datala_hsw(event, pebs->dse);
-               else if (fst)
-                       val = precise_store_data(pebs->dse);
-               data->data_src.val = val;
-       }
-
-       /*
-        * We use the interrupt regs as a base because the PEBS record
-        * does not contain a full regs set, specifically it seems to
-        * lack segment descriptors, which get used by things like
-        * user_mode().
-        *
-        * In the simple case fix up only the IP and BP,SP regs, for
-        * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
-        * A possible PERF_SAMPLE_REGS will have to transfer all regs.
-        */
-       *regs = *iregs;
-       regs->flags = pebs->flags;
-       set_linear_ip(regs, pebs->ip);
-       regs->bp = pebs->bp;
-       regs->sp = pebs->sp;
-
-       if (sample_type & PERF_SAMPLE_REGS_INTR) {
-               regs->ax = pebs->ax;
-               regs->bx = pebs->bx;
-               regs->cx = pebs->cx;
-               regs->dx = pebs->dx;
-               regs->si = pebs->si;
-               regs->di = pebs->di;
-               regs->bp = pebs->bp;
-               regs->sp = pebs->sp;
-
-               regs->flags = pebs->flags;
-#ifndef CONFIG_X86_32
-               regs->r8 = pebs->r8;
-               regs->r9 = pebs->r9;
-               regs->r10 = pebs->r10;
-               regs->r11 = pebs->r11;
-               regs->r12 = pebs->r12;
-               regs->r13 = pebs->r13;
-               regs->r14 = pebs->r14;
-               regs->r15 = pebs->r15;
-#endif
-       }
-
-       if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-               regs->ip = pebs->real_ip;
-               regs->flags |= PERF_EFLAGS_EXACT;
-       } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
-               regs->flags |= PERF_EFLAGS_EXACT;
-       else
-               regs->flags &= ~PERF_EFLAGS_EXACT;
-
-       if ((sample_type & PERF_SAMPLE_ADDR) &&
-           x86_pmu.intel_cap.pebs_format >= 1)
-               data->addr = pebs->dla;
-
-       if (x86_pmu.intel_cap.pebs_format >= 2) {
-               /* Only set the TSX weight when no memory weight. */
-               if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
-                       data->weight = intel_hsw_weight(pebs);
-
-               if (sample_type & PERF_SAMPLE_TRANSACTION)
-                       data->txn = intel_hsw_transaction(pebs);
-       }
-
-       /*
-        * v3 supplies an accurate time stamp, so we use that
-        * for the time stamp.
-        *
-        * We can only do this for the default trace clock.
-        */
-       if (x86_pmu.intel_cap.pebs_format >= 3 &&
-               event->attr.use_clockid == 0)
-               data->time = native_sched_clock_from_tsc(pebs->tsc);
-
-       if (has_branch_stack(event))
-               data->br_stack = &cpuc->lbr_stack;
-}
-
-static inline void *
-get_next_pebs_record_by_bit(void *base, void *top, int bit)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       void *at;
-       u64 pebs_status;
-
-       /*
-        * fmt0 does not have a status bitfield (does not use
-        * perf_record_nhm format)
-        */
-       if (x86_pmu.intel_cap.pebs_format < 1)
-               return base;
-
-       if (base == NULL)
-               return NULL;
-
-       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
-               struct pebs_record_nhm *p = at;
-
-               if (test_bit(bit, (unsigned long *)&p->status)) {
-                       /* PEBS v3 has accurate status bits */
-                       if (x86_pmu.intel_cap.pebs_format >= 3)
-                               return at;
-
-                       if (p->status == (1 << bit))
-                               return at;
-
-                       /* clear non-PEBS bit and re-check */
-                       pebs_status = p->status & cpuc->pebs_enabled;
-                       pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
-                       if (pebs_status == (1 << bit))
-                               return at;
-               }
-       }
-       return NULL;
-}
-
-static void __intel_pmu_pebs_event(struct perf_event *event,
-                                  struct pt_regs *iregs,
-                                  void *base, void *top,
-                                  int bit, int count)
-{
-       struct perf_sample_data data;
-       struct pt_regs regs;
-       void *at = get_next_pebs_record_by_bit(base, top, bit);
-
-       if (!intel_pmu_save_and_restart(event) &&
-           !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
-               return;
-
-       while (count > 1) {
-               setup_pebs_sample_data(event, iregs, at, &data, &regs);
-               perf_event_output(event, &data, &regs);
-               at += x86_pmu.pebs_record_size;
-               at = get_next_pebs_record_by_bit(at, top, bit);
-               count--;
-       }
-
-       setup_pebs_sample_data(event, iregs, at, &data, &regs);
-
-       /*
-        * All but the last records are processed.
-        * The last one is left to be able to call the overflow handler.
-        */
-       if (perf_event_overflow(event, &data, &regs)) {
-               x86_pmu_stop(event, 0);
-               return;
-       }
-
-}
-
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct perf_event *event = cpuc->events[0]; /* PMC0 only */
-       struct pebs_record_core *at, *top;
-       int n;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
-
-       /*
-        * Whatever else happens, drain the thing
-        */
-       ds->pebs_index = ds->pebs_buffer_base;
-
-       if (!test_bit(0, cpuc->active_mask))
-               return;
-
-       WARN_ON_ONCE(!event);
-
-       if (!event->attr.precise_ip)
-               return;
-
-       n = top - at;
-       if (n <= 0)
-               return;
-
-       __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
-}
-
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct perf_event *event;
-       void *base, *at, *top;
-       short counts[MAX_PEBS_EVENTS] = {};
-       short error[MAX_PEBS_EVENTS] = {};
-       int bit, i;
-
-       if (!x86_pmu.pebs_active)
-               return;
-
-       base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
-       ds->pebs_index = ds->pebs_buffer_base;
-
-       if (unlikely(base >= top))
-               return;
-
-       for (at = base; at < top; at += x86_pmu.pebs_record_size) {
-               struct pebs_record_nhm *p = at;
-               u64 pebs_status;
-
-               /* PEBS v3 has accurate status bits */
-               if (x86_pmu.intel_cap.pebs_format >= 3) {
-                       for_each_set_bit(bit, (unsigned long *)&p->status,
-                                        MAX_PEBS_EVENTS)
-                               counts[bit]++;
-
-                       continue;
-               }
-
-               pebs_status = p->status & cpuc->pebs_enabled;
-               pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
-
-               /*
-                * On some CPUs the PEBS status can be zero when PEBS is
-                * racing with clearing of GLOBAL_STATUS.
-                *
-                * Normally we would drop that record, but in the
-                * case when there is only a single active PEBS event
-                * we can assume it's for that event.
-                */
-               if (!pebs_status && cpuc->pebs_enabled &&
-                       !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
-                       pebs_status = cpuc->pebs_enabled;
-
-               bit = find_first_bit((unsigned long *)&pebs_status,
-                                       x86_pmu.max_pebs_events);
-               if (bit >= x86_pmu.max_pebs_events)
-                       continue;
-
-               /*
-                * The PEBS hardware does not deal well with the situation
-                * when events happen near to each other and multiple bits
-                * are set. But it should happen rarely.
-                *
-                * If these events include one PEBS and multiple non-PEBS
-                * events, it doesn't impact PEBS record. The record will
-                * be handled normally. (slow path)
-                *
-                * If these events include two or more PEBS events, the
-                * records for the events can be collapsed into a single
-                * one, and it's not possible to reconstruct all events
-                * that caused the PEBS record. It's called collision.
-                * If collision happened, the record will be dropped.
-                */
-               if (p->status != (1ULL << bit)) {
-                       for_each_set_bit(i, (unsigned long *)&pebs_status,
-                                        x86_pmu.max_pebs_events)
-                               error[i]++;
-                       continue;
-               }
-
-               counts[bit]++;
-       }
-
-       for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
-               if ((counts[bit] == 0) && (error[bit] == 0))
-                       continue;
-
-               event = cpuc->events[bit];
-               WARN_ON_ONCE(!event);
-               WARN_ON_ONCE(!event->attr.precise_ip);
-
-               /* log dropped samples number */
-               if (error[bit])
-                       perf_log_lost_samples(event, error[bit]);
-
-               if (counts[bit]) {
-                       __intel_pmu_pebs_event(event, iregs, base,
-                                              top, bit, counts[bit]);
-               }
-       }
-}
-
-/*
- * BTS, PEBS probe and setup
- */
-
-void __init intel_ds_init(void)
-{
-       /*
-        * No support for 32bit formats
-        */
-       if (!boot_cpu_has(X86_FEATURE_DTES64))
-               return;
-
-       x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
-       x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
-       if (x86_pmu.pebs) {
-               char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
-               int format = x86_pmu.intel_cap.pebs_format;
-
-               switch (format) {
-               case 0:
-                       printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
-                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
-                       break;
-
-               case 1:
-                       printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
-                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
-                       break;
-
-               case 2:
-                       pr_cont("PEBS fmt2%c, ", pebs_type);
-                       x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
-                       break;
-
-               case 3:
-                       pr_cont("PEBS fmt3%c, ", pebs_type);
-                       x86_pmu.pebs_record_size =
-                                               sizeof(struct pebs_record_skl);
-                       x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
-                       x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
-                       break;
-
-               default:
-                       printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
-                       x86_pmu.pebs = 0;
-               }
-       }
-}
-
-void perf_restore_debug_store(void)
-{
-       struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
-
-       if (!x86_pmu.bts && !x86_pmu.pebs)
-               return;
-
-       wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
-}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
deleted file mode 100644 (file)
index 653f88d..0000000
+++ /dev/null
@@ -1,1062 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include <asm/perf_event.h>
-#include <asm/msr.h>
-#include <asm/insn.h>
-
-#include "perf_event.h"
-
-enum {
-       LBR_FORMAT_32           = 0x00,
-       LBR_FORMAT_LIP          = 0x01,
-       LBR_FORMAT_EIP          = 0x02,
-       LBR_FORMAT_EIP_FLAGS    = 0x03,
-       LBR_FORMAT_EIP_FLAGS2   = 0x04,
-       LBR_FORMAT_INFO         = 0x05,
-       LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
-};
-
-static enum {
-       LBR_EIP_FLAGS           = 1,
-       LBR_TSX                 = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
-       [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
-       [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
-/*
- * Intel LBR_SELECT bits
- * Intel Vol3a, April 2011, Section 16.7 Table 16-10
- *
- * Hardware branch filter (not available on all CPUs)
- */
-#define LBR_KERNEL_BIT         0 /* do not capture at ring0 */
-#define LBR_USER_BIT           1 /* do not capture at ring > 0 */
-#define LBR_JCC_BIT            2 /* do not capture conditional branches */
-#define LBR_REL_CALL_BIT       3 /* do not capture relative calls */
-#define LBR_IND_CALL_BIT       4 /* do not capture indirect calls */
-#define LBR_RETURN_BIT         5 /* do not capture near returns */
-#define LBR_IND_JMP_BIT                6 /* do not capture indirect jumps */
-#define LBR_REL_JMP_BIT                7 /* do not capture relative jumps */
-#define LBR_FAR_BIT            8 /* do not capture far branches */
-#define LBR_CALL_STACK_BIT     9 /* enable call stack */
-
-/*
- * Following bit only exists in Linux; we mask it out before writing it to
- * the actual MSR. But it helps the constraint perf code to understand
- * that this is a separate configuration.
- */
-#define LBR_NO_INFO_BIT               63 /* don't read LBR_INFO. */
-
-#define LBR_KERNEL     (1 << LBR_KERNEL_BIT)
-#define LBR_USER       (1 << LBR_USER_BIT)
-#define LBR_JCC                (1 << LBR_JCC_BIT)
-#define LBR_REL_CALL   (1 << LBR_REL_CALL_BIT)
-#define LBR_IND_CALL   (1 << LBR_IND_CALL_BIT)
-#define LBR_RETURN     (1 << LBR_RETURN_BIT)
-#define LBR_REL_JMP    (1 << LBR_REL_JMP_BIT)
-#define LBR_IND_JMP    (1 << LBR_IND_JMP_BIT)
-#define LBR_FAR                (1 << LBR_FAR_BIT)
-#define LBR_CALL_STACK (1 << LBR_CALL_STACK_BIT)
-#define LBR_NO_INFO    (1ULL << LBR_NO_INFO_BIT)
-
-#define LBR_PLM (LBR_KERNEL | LBR_USER)
-
-#define LBR_SEL_MASK   0x1ff   /* valid bits in LBR_SELECT */
-#define LBR_NOT_SUPP   -1      /* LBR filter not supported */
-#define LBR_IGN                0       /* ignored */
-
-#define LBR_ANY                 \
-       (LBR_JCC        |\
-        LBR_REL_CALL   |\
-        LBR_IND_CALL   |\
-        LBR_RETURN     |\
-        LBR_REL_JMP    |\
-        LBR_IND_JMP    |\
-        LBR_FAR)
-
-#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
-#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
-#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
-
-/*
- * x86control flow change classification
- * x86control flow changes include branches, interrupts, traps, faults
- */
-enum {
-       X86_BR_NONE             = 0,      /* unknown */
-
-       X86_BR_USER             = 1 << 0, /* branch target is user */
-       X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
-
-       X86_BR_CALL             = 1 << 2, /* call */
-       X86_BR_RET              = 1 << 3, /* return */
-       X86_BR_SYSCALL          = 1 << 4, /* syscall */
-       X86_BR_SYSRET           = 1 << 5, /* syscall return */
-       X86_BR_INT              = 1 << 6, /* sw interrupt */
-       X86_BR_IRET             = 1 << 7, /* return from interrupt */
-       X86_BR_JCC              = 1 << 8, /* conditional */
-       X86_BR_JMP              = 1 << 9, /* jump */
-       X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
-       X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
-       X86_BR_ABORT            = 1 << 12,/* transaction abort */
-       X86_BR_IN_TX            = 1 << 13,/* in transaction */
-       X86_BR_NO_TX            = 1 << 14,/* not in transaction */
-       X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
-       X86_BR_CALL_STACK       = 1 << 16,/* call stack */
-       X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
-};
-
-#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
-#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
-
-#define X86_BR_ANY       \
-       (X86_BR_CALL    |\
-        X86_BR_RET     |\
-        X86_BR_SYSCALL |\
-        X86_BR_SYSRET  |\
-        X86_BR_INT     |\
-        X86_BR_IRET    |\
-        X86_BR_JCC     |\
-        X86_BR_JMP      |\
-        X86_BR_IRQ      |\
-        X86_BR_ABORT    |\
-        X86_BR_IND_CALL |\
-        X86_BR_IND_JMP  |\
-        X86_BR_ZERO_CALL)
-
-#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
-
-#define X86_BR_ANY_CALL                 \
-       (X86_BR_CALL            |\
-        X86_BR_IND_CALL        |\
-        X86_BR_ZERO_CALL       |\
-        X86_BR_SYSCALL         |\
-        X86_BR_IRQ             |\
-        X86_BR_INT)
-
-static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
-
-/*
- * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
- * otherwise it becomes near impossible to get a reliable stack.
- */
-
-static void __intel_pmu_lbr_enable(bool pmi)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       u64 debugctl, lbr_select = 0, orig_debugctl;
-
-       /*
-        * No need to unfreeze manually, as v4 can do that as part
-        * of the GLOBAL_STATUS ack.
-        */
-       if (pmi && x86_pmu.version >= 4)
-               return;
-
-       /*
-        * No need to reprogram LBR_SELECT in a PMI, as it
-        * did not change.
-        */
-       if (cpuc->lbr_sel)
-               lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
-       if (!pmi && cpuc->lbr_sel)
-               wrmsrl(MSR_LBR_SELECT, lbr_select);
-
-       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-       orig_debugctl = debugctl;
-       debugctl |= DEBUGCTLMSR_LBR;
-       /*
-        * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
-        * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
-        * may cause superfluous increase/decrease of LBR_TOS.
-        */
-       if (!(lbr_select & LBR_CALL_STACK))
-               debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
-       if (orig_debugctl != debugctl)
-               wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
-static void __intel_pmu_lbr_disable(void)
-{
-       u64 debugctl;
-
-       rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-       debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
-       wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
-static void intel_pmu_lbr_reset_32(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.lbr_nr; i++)
-               wrmsrl(x86_pmu.lbr_from + i, 0);
-}
-
-static void intel_pmu_lbr_reset_64(void)
-{
-       int i;
-
-       for (i = 0; i < x86_pmu.lbr_nr; i++) {
-               wrmsrl(x86_pmu.lbr_from + i, 0);
-               wrmsrl(x86_pmu.lbr_to   + i, 0);
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       wrmsrl(MSR_LBR_INFO_0 + i, 0);
-       }
-}
-
-void intel_pmu_lbr_reset(void)
-{
-       if (!x86_pmu.lbr_nr)
-               return;
-
-       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
-               intel_pmu_lbr_reset_32();
-       else
-               intel_pmu_lbr_reset_64();
-}
-
-/*
- * TOS = most recently recorded branch
- */
-static inline u64 intel_pmu_lbr_tos(void)
-{
-       u64 tos;
-
-       rdmsrl(x86_pmu.lbr_tos, tos);
-       return tos;
-}
-
-enum {
-       LBR_NONE,
-       LBR_VALID,
-};
-
-static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
-{
-       int i;
-       unsigned lbr_idx, mask;
-       u64 tos;
-
-       if (task_ctx->lbr_callstack_users == 0 ||
-           task_ctx->lbr_stack_state == LBR_NONE) {
-               intel_pmu_lbr_reset();
-               return;
-       }
-
-       mask = x86_pmu.lbr_nr - 1;
-       tos = task_ctx->tos;
-       for (i = 0; i < tos; i++) {
-               lbr_idx = (tos - i) & mask;
-               wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-               wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
-       }
-       wrmsrl(x86_pmu.lbr_tos, tos);
-       task_ctx->lbr_stack_state = LBR_NONE;
-}
-
-static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
-{
-       int i;
-       unsigned lbr_idx, mask;
-       u64 tos;
-
-       if (task_ctx->lbr_callstack_users == 0) {
-               task_ctx->lbr_stack_state = LBR_NONE;
-               return;
-       }
-
-       mask = x86_pmu.lbr_nr - 1;
-       tos = intel_pmu_lbr_tos();
-       for (i = 0; i < tos; i++) {
-               lbr_idx = (tos - i) & mask;
-               rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-               rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
-               if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
-                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
-       }
-       task_ctx->tos = tos;
-       task_ctx->lbr_stack_state = LBR_VALID;
-}
-
-void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
-
-       /*
-        * If LBR callstack feature is enabled and the stack was saved when
-        * the task was scheduled out, restore the stack. Otherwise flush
-        * the LBR stack.
-        */
-       task_ctx = ctx ? ctx->task_ctx_data : NULL;
-       if (task_ctx) {
-               if (sched_in) {
-                       __intel_pmu_lbr_restore(task_ctx);
-                       cpuc->lbr_context = ctx;
-               } else {
-                       __intel_pmu_lbr_save(task_ctx);
-               }
-               return;
-       }
-
-       /*
-        * When sampling the branck stack in system-wide, it may be
-        * necessary to flush the stack on context switch. This happens
-        * when the branch stack does not tag its entries with the pid
-        * of the current task. Otherwise it becomes impossible to
-        * associate a branch entry with a task. This ambiguity is more
-        * likely to appear when the branch stack supports priv level
-        * filtering and the user sets it to monitor only at the user
-        * level (which could be a useful measurement in system-wide
-        * mode). In that case, the risk is high of having a branch
-        * stack with branch from multiple tasks.
-        */
-       if (sched_in) {
-               intel_pmu_lbr_reset();
-               cpuc->lbr_context = ctx;
-       }
-}
-
-static inline bool branch_user_callstack(unsigned br_sel)
-{
-       return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
-}
-
-void intel_pmu_lbr_enable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
-
-       if (!x86_pmu.lbr_nr)
-               return;
-
-       /*
-        * Reset the LBR stack if we changed task context to
-        * avoid data leaks.
-        */
-       if (event->ctx->task && cpuc->lbr_context != event->ctx) {
-               intel_pmu_lbr_reset();
-               cpuc->lbr_context = event->ctx;
-       }
-       cpuc->br_sel = event->hw.branch_reg.reg;
-
-       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-                                       event->ctx->task_ctx_data) {
-               task_ctx = event->ctx->task_ctx_data;
-               task_ctx->lbr_callstack_users++;
-       }
-
-       cpuc->lbr_users++;
-       perf_sched_cb_inc(event->ctx->pmu);
-}
-
-void intel_pmu_lbr_disable(struct perf_event *event)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       struct x86_perf_task_context *task_ctx;
-
-       if (!x86_pmu.lbr_nr)
-               return;
-
-       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-                                       event->ctx->task_ctx_data) {
-               task_ctx = event->ctx->task_ctx_data;
-               task_ctx->lbr_callstack_users--;
-       }
-
-       cpuc->lbr_users--;
-       WARN_ON_ONCE(cpuc->lbr_users < 0);
-       perf_sched_cb_dec(event->ctx->pmu);
-
-       if (cpuc->enabled && !cpuc->lbr_users) {
-               __intel_pmu_lbr_disable();
-               /* avoid stale pointer */
-               cpuc->lbr_context = NULL;
-       }
-}
-
-void intel_pmu_lbr_enable_all(bool pmi)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->lbr_users)
-               __intel_pmu_lbr_enable(pmi);
-}
-
-void intel_pmu_lbr_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (cpuc->lbr_users)
-               __intel_pmu_lbr_disable();
-}
-
-static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
-{
-       unsigned long mask = x86_pmu.lbr_nr - 1;
-       u64 tos = intel_pmu_lbr_tos();
-       int i;
-
-       for (i = 0; i < x86_pmu.lbr_nr; i++) {
-               unsigned long lbr_idx = (tos - i) & mask;
-               union {
-                       struct {
-                               u32 from;
-                               u32 to;
-                       };
-                       u64     lbr;
-               } msr_lastbranch;
-
-               rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
-
-               cpuc->lbr_entries[i].from       = msr_lastbranch.from;
-               cpuc->lbr_entries[i].to         = msr_lastbranch.to;
-               cpuc->lbr_entries[i].mispred    = 0;
-               cpuc->lbr_entries[i].predicted  = 0;
-               cpuc->lbr_entries[i].reserved   = 0;
-       }
-       cpuc->lbr_stack.nr = i;
-}
-
-/*
- * Due to lack of segmentation in Linux the effective address (offset)
- * is the same as the linear address, allowing us to merge the LIP and EIP
- * LBR formats.
- */
-static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
-{
-       bool need_info = false;
-       unsigned long mask = x86_pmu.lbr_nr - 1;
-       int lbr_format = x86_pmu.intel_cap.lbr_format;
-       u64 tos = intel_pmu_lbr_tos();
-       int i;
-       int out = 0;
-       int num = x86_pmu.lbr_nr;
-
-       if (cpuc->lbr_sel) {
-               need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
-               if (cpuc->lbr_sel->config & LBR_CALL_STACK)
-                       num = tos;
-       }
-
-       for (i = 0; i < num; i++) {
-               unsigned long lbr_idx = (tos - i) & mask;
-               u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
-               int skip = 0;
-               u16 cycles = 0;
-               int lbr_flags = lbr_desc[lbr_format];
-
-               rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
-               rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
-
-               if (lbr_format == LBR_FORMAT_INFO && need_info) {
-                       u64 info;
-
-                       rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
-                       mis = !!(info & LBR_INFO_MISPRED);
-                       pred = !mis;
-                       in_tx = !!(info & LBR_INFO_IN_TX);
-                       abort = !!(info & LBR_INFO_ABORT);
-                       cycles = (info & LBR_INFO_CYCLES);
-               }
-               if (lbr_flags & LBR_EIP_FLAGS) {
-                       mis = !!(from & LBR_FROM_FLAG_MISPRED);
-                       pred = !mis;
-                       skip = 1;
-               }
-               if (lbr_flags & LBR_TSX) {
-                       in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
-                       abort = !!(from & LBR_FROM_FLAG_ABORT);
-                       skip = 3;
-               }
-               from = (u64)((((s64)from) << skip) >> skip);
-
-               /*
-                * Some CPUs report duplicated abort records,
-                * with the second entry not having an abort bit set.
-                * Skip them here. This loop runs backwards,
-                * so we need to undo the previous record.
-                * If the abort just happened outside the window
-                * the extra entry cannot be removed.
-                */
-               if (abort && x86_pmu.lbr_double_abort && out > 0)
-                       out--;
-
-               cpuc->lbr_entries[out].from      = from;
-               cpuc->lbr_entries[out].to        = to;
-               cpuc->lbr_entries[out].mispred   = mis;
-               cpuc->lbr_entries[out].predicted = pred;
-               cpuc->lbr_entries[out].in_tx     = in_tx;
-               cpuc->lbr_entries[out].abort     = abort;
-               cpuc->lbr_entries[out].cycles    = cycles;
-               cpuc->lbr_entries[out].reserved  = 0;
-               out++;
-       }
-       cpuc->lbr_stack.nr = out;
-}
-
-void intel_pmu_lbr_read(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       if (!cpuc->lbr_users)
-               return;
-
-       if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
-               intel_pmu_lbr_read_32(cpuc);
-       else
-               intel_pmu_lbr_read_64(cpuc);
-
-       intel_pmu_lbr_filter(cpuc);
-}
-
-/*
- * SW filter is used:
- * - in case there is no HW filter
- * - in case the HW filter has errata or limitations
- */
-static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
-{
-       u64 br_type = event->attr.branch_sample_type;
-       int mask = 0;
-
-       if (br_type & PERF_SAMPLE_BRANCH_USER)
-               mask |= X86_BR_USER;
-
-       if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
-               mask |= X86_BR_KERNEL;
-
-       /* we ignore BRANCH_HV here */
-
-       if (br_type & PERF_SAMPLE_BRANCH_ANY)
-               mask |= X86_BR_ANY;
-
-       if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
-               mask |= X86_BR_ANY_CALL;
-
-       if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
-               mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
-
-       if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
-               mask |= X86_BR_IND_CALL;
-
-       if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
-               mask |= X86_BR_ABORT;
-
-       if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
-               mask |= X86_BR_IN_TX;
-
-       if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
-               mask |= X86_BR_NO_TX;
-
-       if (br_type & PERF_SAMPLE_BRANCH_COND)
-               mask |= X86_BR_JCC;
-
-       if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
-               if (!x86_pmu_has_lbr_callstack())
-                       return -EOPNOTSUPP;
-               if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
-                       return -EINVAL;
-               mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
-                       X86_BR_CALL_STACK;
-       }
-
-       if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
-               mask |= X86_BR_IND_JMP;
-
-       if (br_type & PERF_SAMPLE_BRANCH_CALL)
-               mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
-       /*
-        * stash actual user request into reg, it may
-        * be used by fixup code for some CPU
-        */
-       event->hw.branch_reg.reg = mask;
-       return 0;
-}
-
-/*
- * setup the HW LBR filter
- * Used only when available, may not be enough to disambiguate
- * all branches, may need the help of the SW filter
- */
-static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg;
-       u64 br_type = event->attr.branch_sample_type;
-       u64 mask = 0, v;
-       int i;
-
-       for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
-               if (!(br_type & (1ULL << i)))
-                       continue;
-
-               v = x86_pmu.lbr_sel_map[i];
-               if (v == LBR_NOT_SUPP)
-                       return -EOPNOTSUPP;
-
-               if (v != LBR_IGN)
-                       mask |= v;
-       }
-
-       reg = &event->hw.branch_reg;
-       reg->idx = EXTRA_REG_LBR;
-
-       /*
-        * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
-        * in suppress mode. So LBR_SELECT should be set to
-        * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
-        */
-       reg->config = mask ^ x86_pmu.lbr_sel_mask;
-
-       if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
-           (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
-           (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
-               reg->config |= LBR_NO_INFO;
-
-       return 0;
-}
-
-int intel_pmu_setup_lbr_filter(struct perf_event *event)
-{
-       int ret = 0;
-
-       /*
-        * no LBR on this PMU
-        */
-       if (!x86_pmu.lbr_nr)
-               return -EOPNOTSUPP;
-
-       /*
-        * setup SW LBR filter
-        */
-       ret = intel_pmu_setup_sw_lbr_filter(event);
-       if (ret)
-               return ret;
-
-       /*
-        * setup HW LBR filter, if any
-        */
-       if (x86_pmu.lbr_sel_map)
-               ret = intel_pmu_setup_hw_lbr_filter(event);
-
-       return ret;
-}
-
-/*
- * return the type of control flow change at address "from"
- * intruction is not necessarily a branch (in case of interrupt).
- *
- * The branch type returned also includes the priv level of the
- * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
- *
- * If a branch type is unknown OR the instruction cannot be
- * decoded (e.g., text page not present), then X86_BR_NONE is
- * returned.
- */
-static int branch_type(unsigned long from, unsigned long to, int abort)
-{
-       struct insn insn;
-       void *addr;
-       int bytes_read, bytes_left;
-       int ret = X86_BR_NONE;
-       int ext, to_plm, from_plm;
-       u8 buf[MAX_INSN_SIZE];
-       int is64 = 0;
-
-       to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
-       from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
-
-       /*
-        * maybe zero if lbr did not fill up after a reset by the time
-        * we get a PMU interrupt
-        */
-       if (from == 0 || to == 0)
-               return X86_BR_NONE;
-
-       if (abort)
-               return X86_BR_ABORT | to_plm;
-
-       if (from_plm == X86_BR_USER) {
-               /*
-                * can happen if measuring at the user level only
-                * and we interrupt in a kernel thread, e.g., idle.
-                */
-               if (!current->mm)
-                       return X86_BR_NONE;
-
-               /* may fail if text not present */
-               bytes_left = copy_from_user_nmi(buf, (void __user *)from,
-                                               MAX_INSN_SIZE);
-               bytes_read = MAX_INSN_SIZE - bytes_left;
-               if (!bytes_read)
-                       return X86_BR_NONE;
-
-               addr = buf;
-       } else {
-               /*
-                * The LBR logs any address in the IP, even if the IP just
-                * faulted. This means userspace can control the from address.
-                * Ensure we don't blindy read any address by validating it is
-                * a known text address.
-                */
-               if (kernel_text_address(from)) {
-                       addr = (void *)from;
-                       /*
-                        * Assume we can get the maximum possible size
-                        * when grabbing kernel data.  This is not
-                        * _strictly_ true since we could possibly be
-                        * executing up next to a memory hole, but
-                        * it is very unlikely to be a problem.
-                        */
-                       bytes_read = MAX_INSN_SIZE;
-               } else {
-                       return X86_BR_NONE;
-               }
-       }
-
-       /*
-        * decoder needs to know the ABI especially
-        * on 64-bit systems running 32-bit apps
-        */
-#ifdef CONFIG_X86_64
-       is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
-#endif
-       insn_init(&insn, addr, bytes_read, is64);
-       insn_get_opcode(&insn);
-       if (!insn.opcode.got)
-               return X86_BR_ABORT;
-
-       switch (insn.opcode.bytes[0]) {
-       case 0xf:
-               switch (insn.opcode.bytes[1]) {
-               case 0x05: /* syscall */
-               case 0x34: /* sysenter */
-                       ret = X86_BR_SYSCALL;
-                       break;
-               case 0x07: /* sysret */
-               case 0x35: /* sysexit */
-                       ret = X86_BR_SYSRET;
-                       break;
-               case 0x80 ... 0x8f: /* conditional */
-                       ret = X86_BR_JCC;
-                       break;
-               default:
-                       ret = X86_BR_NONE;
-               }
-               break;
-       case 0x70 ... 0x7f: /* conditional */
-               ret = X86_BR_JCC;
-               break;
-       case 0xc2: /* near ret */
-       case 0xc3: /* near ret */
-       case 0xca: /* far ret */
-       case 0xcb: /* far ret */
-               ret = X86_BR_RET;
-               break;
-       case 0xcf: /* iret */
-               ret = X86_BR_IRET;
-               break;
-       case 0xcc ... 0xce: /* int */
-               ret = X86_BR_INT;
-               break;
-       case 0xe8: /* call near rel */
-               insn_get_immediate(&insn);
-               if (insn.immediate1.value == 0) {
-                       /* zero length call */
-                       ret = X86_BR_ZERO_CALL;
-                       break;
-               }
-       case 0x9a: /* call far absolute */
-               ret = X86_BR_CALL;
-               break;
-       case 0xe0 ... 0xe3: /* loop jmp */
-               ret = X86_BR_JCC;
-               break;
-       case 0xe9 ... 0xeb: /* jmp */
-               ret = X86_BR_JMP;
-               break;
-       case 0xff: /* call near absolute, call far absolute ind */
-               insn_get_modrm(&insn);
-               ext = (insn.modrm.bytes[0] >> 3) & 0x7;
-               switch (ext) {
-               case 2: /* near ind call */
-               case 3: /* far ind call */
-                       ret = X86_BR_IND_CALL;
-                       break;
-               case 4:
-               case 5:
-                       ret = X86_BR_IND_JMP;
-                       break;
-               }
-               break;
-       default:
-               ret = X86_BR_NONE;
-       }
-       /*
-        * interrupts, traps, faults (and thus ring transition) may
-        * occur on any instructions. Thus, to classify them correctly,
-        * we need to first look at the from and to priv levels. If they
-        * are different and to is in the kernel, then it indicates
-        * a ring transition. If the from instruction is not a ring
-        * transition instr (syscall, systenter, int), then it means
-        * it was a irq, trap or fault.
-        *
-        * we have no way of detecting kernel to kernel faults.
-        */
-       if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
-           && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
-               ret = X86_BR_IRQ;
-
-       /*
-        * branch priv level determined by target as
-        * is done by HW when LBR_SELECT is implemented
-        */
-       if (ret != X86_BR_NONE)
-               ret |= to_plm;
-
-       return ret;
-}
-
-/*
- * implement actual branch filter based on user demand.
- * Hardware may not exactly satisfy that request, thus
- * we need to inspect opcodes. Mismatched branches are
- * discarded. Therefore, the number of branches returned
- * in PERF_SAMPLE_BRANCH_STACK sample may vary.
- */
-static void
-intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
-{
-       u64 from, to;
-       int br_sel = cpuc->br_sel;
-       int i, j, type;
-       bool compress = false;
-
-       /* if sampling all branches, then nothing to filter */
-       if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
-               return;
-
-       for (i = 0; i < cpuc->lbr_stack.nr; i++) {
-
-               from = cpuc->lbr_entries[i].from;
-               to = cpuc->lbr_entries[i].to;
-
-               type = branch_type(from, to, cpuc->lbr_entries[i].abort);
-               if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
-                       if (cpuc->lbr_entries[i].in_tx)
-                               type |= X86_BR_IN_TX;
-                       else
-                               type |= X86_BR_NO_TX;
-               }
-
-               /* if type does not correspond, then discard */
-               if (type == X86_BR_NONE || (br_sel & type) != type) {
-                       cpuc->lbr_entries[i].from = 0;
-                       compress = true;
-               }
-       }
-
-       if (!compress)
-               return;
-
-       /* remove all entries with from=0 */
-       for (i = 0; i < cpuc->lbr_stack.nr; ) {
-               if (!cpuc->lbr_entries[i].from) {
-                       j = i;
-                       while (++j < cpuc->lbr_stack.nr)
-                               cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
-                       cpuc->lbr_stack.nr--;
-                       if (!cpuc->lbr_entries[i].from)
-                               continue;
-               }
-               i++;
-       }
-}
-
-/*
- * Map interface branch filters onto LBR filters
- */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
-       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
-                                               | LBR_IND_JMP | LBR_FAR,
-       /*
-        * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
-        */
-       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
-        LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
-       /*
-        * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
-        */
-       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
-       [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
-       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
-};
-
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
-       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
-                                               | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
-       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
-       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
-       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
-};
-
-static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
-       [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
-       [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
-       [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
-       [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
-       [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
-                                               | LBR_FAR,
-       [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
-       [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
-       [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
-                                               | LBR_RETURN | LBR_CALL_STACK,
-       [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
-       [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
-};
-
-/* core */
-void __init intel_pmu_lbr_init_core(void)
-{
-       x86_pmu.lbr_nr     = 4;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
-
-       /*
-        * SW branch filter usage:
-        * - compensate for lack of HW filter
-        */
-       pr_cont("4-deep LBR, ");
-}
-
-/* nehalem/westmere */
-void __init intel_pmu_lbr_init_nhm(void)
-{
-       x86_pmu.lbr_nr     = 16;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
-
-       /*
-        * SW branch filter usage:
-        * - workaround LBR_SEL errata (see above)
-        * - support syscall, sysret capture.
-        *   That requires LBR_FAR but that means far
-        *   jmp need to be filtered out
-        */
-       pr_cont("16-deep LBR, ");
-}
-
-/* sandy bridge */
-void __init intel_pmu_lbr_init_snb(void)
-{
-       x86_pmu.lbr_nr   = 16;
-       x86_pmu.lbr_tos  = MSR_LBR_TOS;
-       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
-
-       /*
-        * SW branch filter usage:
-        * - support syscall, sysret capture.
-        *   That requires LBR_FAR but that means far
-        *   jmp need to be filtered out
-        */
-       pr_cont("16-deep LBR, ");
-}
-
-/* haswell */
-void intel_pmu_lbr_init_hsw(void)
-{
-       x86_pmu.lbr_nr   = 16;
-       x86_pmu.lbr_tos  = MSR_LBR_TOS;
-       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
-
-       pr_cont("16-deep LBR, ");
-}
-
-/* skylake */
-__init void intel_pmu_lbr_init_skl(void)
-{
-       x86_pmu.lbr_nr   = 32;
-       x86_pmu.lbr_tos  = MSR_LBR_TOS;
-       x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
-
-       /*
-        * SW branch filter usage:
-        * - support syscall, sysret capture.
-        *   That requires LBR_FAR but that means far
-        *   jmp need to be filtered out
-        */
-       pr_cont("32-deep LBR, ");
-}
-
-/* atom */
-void __init intel_pmu_lbr_init_atom(void)
-{
-       /*
-        * only models starting at stepping 10 seems
-        * to have an operational LBR which can freeze
-        * on PMU interrupt
-        */
-       if (boot_cpu_data.x86_model == 28
-           && boot_cpu_data.x86_mask < 10) {
-               pr_cont("LBR disabled due to erratum");
-               return;
-       }
-
-       x86_pmu.lbr_nr     = 8;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
-
-       /*
-        * SW branch filter usage:
-        * - compensate for lack of HW filter
-        */
-       pr_cont("8-deep LBR, ");
-}
-
-/* Knights Landing */
-void intel_pmu_lbr_init_knl(void)
-{
-       x86_pmu.lbr_nr     = 8;
-       x86_pmu.lbr_tos    = MSR_LBR_TOS;
-       x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
-       x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
-
-       x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-       x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
-
-       pr_cont("8-deep LBR, ");
-}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
deleted file mode 100644 (file)
index c0bbd10..0000000
+++ /dev/null
@@ -1,1188 +0,0 @@
-/*
- * Intel(R) Processor Trace PMU driver for perf
- * Copyright (c) 2013-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * Intel PT is specified in the Intel Architecture Instruction Set Extensions
- * Programming Reference:
- * http://software.intel.com/en-us/intel-isa-extensions
- */
-
-#undef DEBUG
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-
-#include <asm/perf_event.h>
-#include <asm/insn.h>
-#include <asm/io.h>
-#include <asm/intel_pt.h>
-
-#include "perf_event.h"
-#include "intel_pt.h"
-
-static DEFINE_PER_CPU(struct pt, pt_ctx);
-
-static struct pt_pmu pt_pmu;
-
-enum cpuid_regs {
-       CR_EAX = 0,
-       CR_ECX,
-       CR_EDX,
-       CR_EBX
-};
-
-/*
- * Capabilities of Intel PT hardware, such as number of address bits or
- * supported output schemes, are cached and exported to userspace as "caps"
- * attribute group of pt pmu device
- * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store
- * relevant bits together with intel_pt traces.
- *
- * These are necessary for both trace decoding (payloads_lip, contains address
- * width encoded in IP-related packets), and event configuration (bitmasks with
- * permitted values for certain bit fields).
- */
-#define PT_CAP(_n, _l, _r, _m)                                         \
-       [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l,        \
-                           .reg = _r, .mask = _m }
-
-static struct pt_cap_desc {
-       const char      *name;
-       u32             leaf;
-       u8              reg;
-       u32             mask;
-} pt_caps[] = {
-       PT_CAP(max_subleaf,             0, CR_EAX, 0xffffffff),
-       PT_CAP(cr3_filtering,           0, CR_EBX, BIT(0)),
-       PT_CAP(psb_cyc,                 0, CR_EBX, BIT(1)),
-       PT_CAP(mtc,                     0, CR_EBX, BIT(3)),
-       PT_CAP(topa_output,             0, CR_ECX, BIT(0)),
-       PT_CAP(topa_multiple_entries,   0, CR_ECX, BIT(1)),
-       PT_CAP(single_range_output,     0, CR_ECX, BIT(2)),
-       PT_CAP(payloads_lip,            0, CR_ECX, BIT(31)),
-       PT_CAP(mtc_periods,             1, CR_EAX, 0xffff0000),
-       PT_CAP(cycle_thresholds,        1, CR_EBX, 0xffff),
-       PT_CAP(psb_periods,             1, CR_EBX, 0xffff0000),
-};
-
-static u32 pt_cap_get(enum pt_capabilities cap)
-{
-       struct pt_cap_desc *cd = &pt_caps[cap];
-       u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
-       unsigned int shift = __ffs(cd->mask);
-
-       return (c & cd->mask) >> shift;
-}
-
-static ssize_t pt_cap_show(struct device *cdev,
-                          struct device_attribute *attr,
-                          char *buf)
-{
-       struct dev_ext_attribute *ea =
-               container_of(attr, struct dev_ext_attribute, attr);
-       enum pt_capabilities cap = (long)ea->var;
-
-       return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
-}
-
-static struct attribute_group pt_cap_group = {
-       .name   = "caps",
-};
-
-PMU_FORMAT_ATTR(cyc,           "config:1"      );
-PMU_FORMAT_ATTR(mtc,           "config:9"      );
-PMU_FORMAT_ATTR(tsc,           "config:10"     );
-PMU_FORMAT_ATTR(noretcomp,     "config:11"     );
-PMU_FORMAT_ATTR(mtc_period,    "config:14-17"  );
-PMU_FORMAT_ATTR(cyc_thresh,    "config:19-22"  );
-PMU_FORMAT_ATTR(psb_period,    "config:24-27"  );
-
-static struct attribute *pt_formats_attr[] = {
-       &format_attr_cyc.attr,
-       &format_attr_mtc.attr,
-       &format_attr_tsc.attr,
-       &format_attr_noretcomp.attr,
-       &format_attr_mtc_period.attr,
-       &format_attr_cyc_thresh.attr,
-       &format_attr_psb_period.attr,
-       NULL,
-};
-
-static struct attribute_group pt_format_group = {
-       .name   = "format",
-       .attrs  = pt_formats_attr,
-};
-
-static const struct attribute_group *pt_attr_groups[] = {
-       &pt_cap_group,
-       &pt_format_group,
-       NULL,
-};
-
-static int __init pt_pmu_hw_init(void)
-{
-       struct dev_ext_attribute *de_attrs;
-       struct attribute **attrs;
-       size_t size;
-       int ret;
-       long i;
-
-       attrs = NULL;
-
-       for (i = 0; i < PT_CPUID_LEAVES; i++) {
-               cpuid_count(20, i,
-                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
-                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
-       }
-
-       ret = -ENOMEM;
-       size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1);
-       attrs = kzalloc(size, GFP_KERNEL);
-       if (!attrs)
-               goto fail;
-
-       size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1);
-       de_attrs = kzalloc(size, GFP_KERNEL);
-       if (!de_attrs)
-               goto fail;
-
-       for (i = 0; i < ARRAY_SIZE(pt_caps); i++) {
-               struct dev_ext_attribute *de_attr = de_attrs + i;
-
-               de_attr->attr.attr.name = pt_caps[i].name;
-
-               sysfs_attr_init(&de_attr->attr.attr);
-
-               de_attr->attr.attr.mode         = S_IRUGO;
-               de_attr->attr.show              = pt_cap_show;
-               de_attr->var                    = (void *)i;
-
-               attrs[i] = &de_attr->attr.attr;
-       }
-
-       pt_cap_group.attrs = attrs;
-
-       return 0;
-
-fail:
-       kfree(attrs);
-
-       return ret;
-}
-
-#define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC    | \
-                         RTIT_CTL_CYC_THRESH   | \
-                         RTIT_CTL_PSB_FREQ)
-
-#define RTIT_CTL_MTC   (RTIT_CTL_MTC_EN        | \
-                        RTIT_CTL_MTC_RANGE)
-
-#define PT_CONFIG_MASK (RTIT_CTL_TSC_EN                | \
-                       RTIT_CTL_DISRETC        | \
-                       RTIT_CTL_CYC_PSB        | \
-                       RTIT_CTL_MTC)
-
-static bool pt_event_valid(struct perf_event *event)
-{
-       u64 config = event->attr.config;
-       u64 allowed, requested;
-
-       if ((config & PT_CONFIG_MASK) != config)
-               return false;
-
-       if (config & RTIT_CTL_CYC_PSB) {
-               if (!pt_cap_get(PT_CAP_psb_cyc))
-                       return false;
-
-               allowed = pt_cap_get(PT_CAP_psb_periods);
-               requested = (config & RTIT_CTL_PSB_FREQ) >>
-                       RTIT_CTL_PSB_FREQ_OFFSET;
-               if (requested && (!(allowed & BIT(requested))))
-                       return false;
-
-               allowed = pt_cap_get(PT_CAP_cycle_thresholds);
-               requested = (config & RTIT_CTL_CYC_THRESH) >>
-                       RTIT_CTL_CYC_THRESH_OFFSET;
-               if (requested && (!(allowed & BIT(requested))))
-                       return false;
-       }
-
-       if (config & RTIT_CTL_MTC) {
-               /*
-                * In the unlikely case that CPUID lists valid mtc periods,
-                * but not the mtc capability, drop out here.
-                *
-                * Spec says that setting mtc period bits while mtc bit in
-                * CPUID is 0 will #GP, so better safe than sorry.
-                */
-               if (!pt_cap_get(PT_CAP_mtc))
-                       return false;
-
-               allowed = pt_cap_get(PT_CAP_mtc_periods);
-               if (!allowed)
-                       return false;
-
-               requested = (config & RTIT_CTL_MTC_RANGE) >>
-                       RTIT_CTL_MTC_RANGE_OFFSET;
-
-               if (!(allowed & BIT(requested)))
-                       return false;
-       }
-
-       return true;
-}
-
-/*
- * PT configuration helpers
- * These all are cpu affine and operate on a local PT
- */
-
-static void pt_config(struct perf_event *event)
-{
-       u64 reg;
-
-       if (!event->hw.itrace_started) {
-               event->hw.itrace_started = 1;
-               wrmsrl(MSR_IA32_RTIT_STATUS, 0);
-       }
-
-       reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
-
-       if (!event->attr.exclude_kernel)
-               reg |= RTIT_CTL_OS;
-       if (!event->attr.exclude_user)
-               reg |= RTIT_CTL_USR;
-
-       reg |= (event->attr.config & PT_CONFIG_MASK);
-
-       wrmsrl(MSR_IA32_RTIT_CTL, reg);
-}
-
-static void pt_config_start(bool start)
-{
-       u64 ctl;
-
-       rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-       if (start)
-               ctl |= RTIT_CTL_TRACEEN;
-       else
-               ctl &= ~RTIT_CTL_TRACEEN;
-       wrmsrl(MSR_IA32_RTIT_CTL, ctl);
-
-       /*
-        * A wrmsr that disables trace generation serializes other PT
-        * registers and causes all data packets to be written to memory,
-        * but a fence is required for the data to become globally visible.
-        *
-        * The below WMB, separating data store and aux_head store matches
-        * the consumer's RMB that separates aux_head load and data load.
-        */
-       if (!start)
-               wmb();
-}
-
-static void pt_config_buffer(void *buf, unsigned int topa_idx,
-                            unsigned int output_off)
-{
-       u64 reg;
-
-       wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
-
-       reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
-
-       wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
-}
-
-/*
- * Keep ToPA table-related metadata on the same page as the actual table,
- * taking up a few words from the top
- */
-
-#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
-
-/**
- * struct topa - page-sized ToPA table with metadata at the top
- * @table:     actual ToPA table entries, as understood by PT hardware
- * @list:      linkage to struct pt_buffer's list of tables
- * @phys:      physical address of this page
- * @offset:    offset of the first entry in this table in the buffer
- * @size:      total size of all entries in this table
- * @last:      index of the last initialized entry in this table
- */
-struct topa {
-       struct topa_entry       table[TENTS_PER_PAGE];
-       struct list_head        list;
-       u64                     phys;
-       u64                     offset;
-       size_t                  size;
-       int                     last;
-};
-
-/* make -1 stand for the last table entry */
-#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
-
-/**
- * topa_alloc() - allocate page-sized ToPA table
- * @cpu:       CPU on which to allocate.
- * @gfp:       Allocation flags.
- *
- * Return:     On success, return the pointer to ToPA table page.
- */
-static struct topa *topa_alloc(int cpu, gfp_t gfp)
-{
-       int node = cpu_to_node(cpu);
-       struct topa *topa;
-       struct page *p;
-
-       p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
-       if (!p)
-               return NULL;
-
-       topa = page_address(p);
-       topa->last = 0;
-       topa->phys = page_to_phys(p);
-
-       /*
-        * In case of singe-entry ToPA, always put the self-referencing END
-        * link as the 2nd entry in the table
-        */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries)) {
-               TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
-               TOPA_ENTRY(topa, 1)->end = 1;
-       }
-
-       return topa;
-}
-
-/**
- * topa_free() - free a page-sized ToPA table
- * @topa:      Table to deallocate.
- */
-static void topa_free(struct topa *topa)
-{
-       free_page((unsigned long)topa);
-}
-
-/**
- * topa_insert_table() - insert a ToPA table into a buffer
- * @buf:        PT buffer that's being extended.
- * @topa:       New topa table to be inserted.
- *
- * If it's the first table in this buffer, set up buffer's pointers
- * accordingly; otherwise, add a END=1 link entry to @topa to the current
- * "last" table and adjust the last table pointer to @topa.
- */
-static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
-{
-       struct topa *last = buf->last;
-
-       list_add_tail(&topa->list, &buf->tables);
-
-       if (!buf->first) {
-               buf->first = buf->last = buf->cur = topa;
-               return;
-       }
-
-       topa->offset = last->offset + last->size;
-       buf->last = topa;
-
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               return;
-
-       BUG_ON(last->last != TENTS_PER_PAGE - 1);
-
-       TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
-       TOPA_ENTRY(last, -1)->end = 1;
-}
-
-/**
- * topa_table_full() - check if a ToPA table is filled up
- * @topa:      ToPA table.
- */
-static bool topa_table_full(struct topa *topa)
-{
-       /* single-entry ToPA is a special case */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               return !!topa->last;
-
-       return topa->last == TENTS_PER_PAGE - 1;
-}
-
-/**
- * topa_insert_pages() - create a list of ToPA tables
- * @buf:       PT buffer being initialized.
- * @gfp:       Allocation flags.
- *
- * This initializes a list of ToPA tables with entries from
- * the data_pages provided by rb_alloc_aux().
- *
- * Return:     0 on success or error code.
- */
-static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
-{
-       struct topa *topa = buf->last;
-       int order = 0;
-       struct page *p;
-
-       p = virt_to_page(buf->data_pages[buf->nr_pages]);
-       if (PagePrivate(p))
-               order = page_private(p);
-
-       if (topa_table_full(topa)) {
-               topa = topa_alloc(buf->cpu, gfp);
-               if (!topa)
-                       return -ENOMEM;
-
-               topa_insert_table(buf, topa);
-       }
-
-       TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
-       TOPA_ENTRY(topa, -1)->size = order;
-       if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) {
-               TOPA_ENTRY(topa, -1)->intr = 1;
-               TOPA_ENTRY(topa, -1)->stop = 1;
-       }
-
-       topa->last++;
-       topa->size += sizes(order);
-
-       buf->nr_pages += 1ul << order;
-
-       return 0;
-}
-
-/**
- * pt_topa_dump() - print ToPA tables and their entries
- * @buf:       PT buffer.
- */
-static void pt_topa_dump(struct pt_buffer *buf)
-{
-       struct topa *topa;
-
-       list_for_each_entry(topa, &buf->tables, list) {
-               int i;
-
-               pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
-                        topa->phys, topa->offset, topa->size);
-               for (i = 0; i < TENTS_PER_PAGE; i++) {
-                       pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
-                                &topa->table[i],
-                                (unsigned long)topa->table[i].base << TOPA_SHIFT,
-                                sizes(topa->table[i].size),
-                                topa->table[i].end ?  'E' : ' ',
-                                topa->table[i].intr ? 'I' : ' ',
-                                topa->table[i].stop ? 'S' : ' ',
-                                *(u64 *)&topa->table[i]);
-                       if ((pt_cap_get(PT_CAP_topa_multiple_entries) &&
-                            topa->table[i].stop) ||
-                           topa->table[i].end)
-                               break;
-               }
-       }
-}
-
-/**
- * pt_buffer_advance() - advance to the next output region
- * @buf:       PT buffer.
- *
- * Advance the current pointers in the buffer to the next ToPA entry.
- */
-static void pt_buffer_advance(struct pt_buffer *buf)
-{
-       buf->output_off = 0;
-       buf->cur_idx++;
-
-       if (buf->cur_idx == buf->cur->last) {
-               if (buf->cur == buf->last)
-                       buf->cur = buf->first;
-               else
-                       buf->cur = list_entry(buf->cur->list.next, struct topa,
-                                             list);
-               buf->cur_idx = 0;
-       }
-}
-
-/**
- * pt_update_head() - calculate current offsets and sizes
- * @pt:                Per-cpu pt context.
- *
- * Update buffer's current write pointer position and data size.
- */
-static void pt_update_head(struct pt *pt)
-{
-       struct pt_buffer *buf = perf_get_aux(&pt->handle);
-       u64 topa_idx, base, old;
-
-       /* offset of the first region in this table from the beginning of buf */
-       base = buf->cur->offset + buf->output_off;
-
-       /* offset of the current output region within this table */
-       for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
-               base += sizes(buf->cur->table[topa_idx].size);
-
-       if (buf->snapshot) {
-               local_set(&buf->data_size, base);
-       } else {
-               old = (local64_xchg(&buf->head, base) &
-                      ((buf->nr_pages << PAGE_SHIFT) - 1));
-               if (base < old)
-                       base += buf->nr_pages << PAGE_SHIFT;
-
-               local_add(base - old, &buf->data_size);
-       }
-}
-
-/**
- * pt_buffer_region() - obtain current output region's address
- * @buf:       PT buffer.
- */
-static void *pt_buffer_region(struct pt_buffer *buf)
-{
-       return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
-}
-
-/**
- * pt_buffer_region_size() - obtain current output region's size
- * @buf:       PT buffer.
- */
-static size_t pt_buffer_region_size(struct pt_buffer *buf)
-{
-       return sizes(buf->cur->table[buf->cur_idx].size);
-}
-
-/**
- * pt_handle_status() - take care of possible status conditions
- * @pt:                Per-cpu pt context.
- */
-static void pt_handle_status(struct pt *pt)
-{
-       struct pt_buffer *buf = perf_get_aux(&pt->handle);
-       int advance = 0;
-       u64 status;
-
-       rdmsrl(MSR_IA32_RTIT_STATUS, status);
-
-       if (status & RTIT_STATUS_ERROR) {
-               pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n");
-               pt_topa_dump(buf);
-               status &= ~RTIT_STATUS_ERROR;
-       }
-
-       if (status & RTIT_STATUS_STOPPED) {
-               status &= ~RTIT_STATUS_STOPPED;
-
-               /*
-                * On systems that only do single-entry ToPA, hitting STOP
-                * means we are already losing data; need to let the decoder
-                * know.
-                */
-               if (!pt_cap_get(PT_CAP_topa_multiple_entries) ||
-                   buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
-                       local_inc(&buf->lost);
-                       advance++;
-               }
-       }
-
-       /*
-        * Also on single-entry ToPA implementations, interrupt will come
-        * before the output reaches its output region's boundary.
-        */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot &&
-           pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) {
-               void *head = pt_buffer_region(buf);
-
-               /* everything within this margin needs to be zeroed out */
-               memset(head + buf->output_off, 0,
-                      pt_buffer_region_size(buf) -
-                      buf->output_off);
-               advance++;
-       }
-
-       if (advance)
-               pt_buffer_advance(buf);
-
-       wrmsrl(MSR_IA32_RTIT_STATUS, status);
-}
-
-/**
- * pt_read_offset() - translate registers into buffer pointers
- * @buf:       PT buffer.
- *
- * Set buffer's output pointers from MSR values.
- */
-static void pt_read_offset(struct pt_buffer *buf)
-{
-       u64 offset, base_topa;
-
-       rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
-       buf->cur = phys_to_virt(base_topa);
-
-       rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
-       /* offset within current output region */
-       buf->output_off = offset >> 32;
-       /* index of current output region within this table */
-       buf->cur_idx = (offset & 0xffffff80) >> 7;
-}
-
-/**
- * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
- * @buf:       PT buffer.
- * @pg:                Page offset in the buffer.
- *
- * When advancing to the next output region (ToPA entry), given a page offset
- * into the buffer, we need to find the offset of the first page in the next
- * region.
- */
-static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
-{
-       struct topa_entry *te = buf->topa_index[pg];
-
-       /* one region */
-       if (buf->first == buf->last && buf->first->last == 1)
-               return pg;
-
-       do {
-               pg++;
-               pg &= buf->nr_pages - 1;
-       } while (buf->topa_index[pg] == te);
-
-       return pg;
-}
-
-/**
- * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer
- * @buf:       PT buffer.
- * @handle:    Current output handle.
- *
- * Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected and waking up the consumer after a certain fraction of
- * the buffer has filled up. Only needed and sensible for non-snapshot counters.
- *
- * This obviously relies on buf::head to figure out buffer markers, so it has
- * to be called after pt_buffer_reset_offsets() and before the hardware tracing
- * is enabled.
- */
-static int pt_buffer_reset_markers(struct pt_buffer *buf,
-                                  struct perf_output_handle *handle)
-
-{
-       unsigned long head = local64_read(&buf->head);
-       unsigned long idx, npages, wakeup;
-
-       /* can't stop in the middle of an output region */
-       if (buf->output_off + handle->size + 1 <
-           sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
-               return -EINVAL;
-
-
-       /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               return 0;
-
-       /* clear STOP and INT from current entry */
-       buf->topa_index[buf->stop_pos]->stop = 0;
-       buf->topa_index[buf->intr_pos]->intr = 0;
-
-       /* how many pages till the STOP marker */
-       npages = handle->size >> PAGE_SHIFT;
-
-       /* if it's on a page boundary, fill up one more page */
-       if (!offset_in_page(head + handle->size + 1))
-               npages++;
-
-       idx = (head >> PAGE_SHIFT) + npages;
-       idx &= buf->nr_pages - 1;
-       buf->stop_pos = idx;
-
-       wakeup = handle->wakeup >> PAGE_SHIFT;
-
-       /* in the worst case, wake up the consumer one page before hard stop */
-       idx = (head >> PAGE_SHIFT) + npages - 1;
-       if (idx > wakeup)
-               idx = wakeup;
-
-       idx &= buf->nr_pages - 1;
-       buf->intr_pos = idx;
-
-       buf->topa_index[buf->stop_pos]->stop = 1;
-       buf->topa_index[buf->intr_pos]->intr = 1;
-
-       return 0;
-}
-
-/**
- * pt_buffer_setup_topa_index() - build topa_index[] table of regions
- * @buf:       PT buffer.
- *
- * topa_index[] references output regions indexed by offset into the
- * buffer for purposes of quick reverse lookup.
- */
-static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
-{
-       struct topa *cur = buf->first, *prev = buf->last;
-       struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
-               *te_prev = TOPA_ENTRY(prev, prev->last - 1);
-       int pg = 0, idx = 0;
-
-       while (pg < buf->nr_pages) {
-               int tidx;
-
-               /* pages within one topa entry */
-               for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
-                       buf->topa_index[pg] = te_prev;
-
-               te_prev = te_cur;
-
-               if (idx == cur->last - 1) {
-                       /* advance to next topa table */
-                       idx = 0;
-                       cur = list_entry(cur->list.next, struct topa, list);
-               } else {
-                       idx++;
-               }
-               te_cur = TOPA_ENTRY(cur, idx);
-       }
-
-}
-
-/**
- * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
- * @buf:       PT buffer.
- * @head:      Write pointer (aux_head) from AUX buffer.
- *
- * Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly. This is done after we have obtained the
- * current aux_head position from a successful call to perf_aux_output_begin()
- * to make sure the hardware is writing to the right place.
- *
- * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
- * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
- * which are used to determine INT and STOP markers' locations by a subsequent
- * call to pt_buffer_reset_markers().
- */
-static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
-{
-       int pg;
-
-       if (buf->snapshot)
-               head &= (buf->nr_pages << PAGE_SHIFT) - 1;
-
-       pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
-       pg = pt_topa_next_entry(buf, pg);
-
-       buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
-       buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
-                       (unsigned long)buf->cur) / sizeof(struct topa_entry);
-       buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
-
-       local64_set(&buf->head, head);
-       local_set(&buf->data_size, 0);
-}
-
-/**
- * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer
- * @buf:       PT buffer.
- */
-static void pt_buffer_fini_topa(struct pt_buffer *buf)
-{
-       struct topa *topa, *iter;
-
-       list_for_each_entry_safe(topa, iter, &buf->tables, list) {
-               /*
-                * right now, this is in free_aux() path only, so
-                * no need to unlink this table from the list
-                */
-               topa_free(topa);
-       }
-}
-
-/**
- * pt_buffer_init_topa() - initialize ToPA table for pt buffer
- * @buf:       PT buffer.
- * @size:      Total size of all regions within this ToPA.
- * @gfp:       Allocation flags.
- */
-static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
-                              gfp_t gfp)
-{
-       struct topa *topa;
-       int err;
-
-       topa = topa_alloc(buf->cpu, gfp);
-       if (!topa)
-               return -ENOMEM;
-
-       topa_insert_table(buf, topa);
-
-       while (buf->nr_pages < nr_pages) {
-               err = topa_insert_pages(buf, gfp);
-               if (err) {
-                       pt_buffer_fini_topa(buf);
-                       return -ENOMEM;
-               }
-       }
-
-       pt_buffer_setup_topa_index(buf);
-
-       /* link last table to the first one, unless we're double buffering */
-       if (pt_cap_get(PT_CAP_topa_multiple_entries)) {
-               TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
-               TOPA_ENTRY(buf->last, -1)->end = 1;
-       }
-
-       pt_topa_dump(buf);
-       return 0;
-}
-
-/**
- * pt_buffer_setup_aux() - set up topa tables for a PT buffer
- * @cpu:       Cpu on which to allocate, -1 means current.
- * @pages:     Array of pointers to buffer pages passed from perf core.
- * @nr_pages:  Number of pages in the buffer.
- * @snapshot:  If this is a snapshot/overwrite counter.
- *
- * This is a pmu::setup_aux callback that sets up ToPA tables and all the
- * bookkeeping for an AUX buffer.
- *
- * Return:     Our private PT buffer structure.
- */
-static void *
-pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
-{
-       struct pt_buffer *buf;
-       int node, ret;
-
-       if (!nr_pages)
-               return NULL;
-
-       if (cpu == -1)
-               cpu = raw_smp_processor_id();
-       node = cpu_to_node(cpu);
-
-       buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
-                          GFP_KERNEL, node);
-       if (!buf)
-               return NULL;
-
-       buf->cpu = cpu;
-       buf->snapshot = snapshot;
-       buf->data_pages = pages;
-
-       INIT_LIST_HEAD(&buf->tables);
-
-       ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
-       if (ret) {
-               kfree(buf);
-               return NULL;
-       }
-
-       return buf;
-}
-
-/**
- * pt_buffer_free_aux() - perf AUX deallocation path callback
- * @data:      PT buffer.
- */
-static void pt_buffer_free_aux(void *data)
-{
-       struct pt_buffer *buf = data;
-
-       pt_buffer_fini_topa(buf);
-       kfree(buf);
-}
-
-/**
- * pt_buffer_is_full() - check if the buffer is full
- * @buf:       PT buffer.
- * @pt:                Per-cpu pt handle.
- *
- * If the user hasn't read data from the output region that aux_head
- * points to, the buffer is considered full: the user needs to read at
- * least this region and update aux_tail to point past it.
- */
-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
-{
-       if (buf->snapshot)
-               return false;
-
-       if (local_read(&buf->data_size) >= pt->handle.size)
-               return true;
-
-       return false;
-}
-
-/**
- * intel_pt_interrupt() - PT PMI handler
- */
-void intel_pt_interrupt(void)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf;
-       struct perf_event *event = pt->handle.event;
-
-       /*
-        * There may be a dangling PT bit in the interrupt status register
-        * after PT has been disabled by pt_event_stop(). Make sure we don't
-        * do anything (particularly, re-enable) for this event here.
-        */
-       if (!ACCESS_ONCE(pt->handle_nmi))
-               return;
-
-       pt_config_start(false);
-
-       if (!event)
-               return;
-
-       buf = perf_get_aux(&pt->handle);
-       if (!buf)
-               return;
-
-       pt_read_offset(buf);
-
-       pt_handle_status(pt);
-
-       pt_update_head(pt);
-
-       perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
-                           local_xchg(&buf->lost, 0));
-
-       if (!event->hw.state) {
-               int ret;
-
-               buf = perf_aux_output_begin(&pt->handle, event);
-               if (!buf) {
-                       event->hw.state = PERF_HES_STOPPED;
-                       return;
-               }
-
-               pt_buffer_reset_offsets(buf, pt->handle.head);
-               /* snapshot counters don't use PMI, so it's safe */
-               ret = pt_buffer_reset_markers(buf, &pt->handle);
-               if (ret) {
-                       perf_aux_output_end(&pt->handle, 0, true);
-                       return;
-               }
-
-               pt_config_buffer(buf->cur->table, buf->cur_idx,
-                                buf->output_off);
-               pt_config(event);
-       }
-}
-
-/*
- * PMU callbacks
- */
-
-static void pt_event_start(struct perf_event *event, int mode)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf = perf_get_aux(&pt->handle);
-
-       if (!buf || pt_buffer_is_full(buf, pt)) {
-               event->hw.state = PERF_HES_STOPPED;
-               return;
-       }
-
-       ACCESS_ONCE(pt->handle_nmi) = 1;
-       event->hw.state = 0;
-
-       pt_config_buffer(buf->cur->table, buf->cur_idx,
-                        buf->output_off);
-       pt_config(event);
-}
-
-static void pt_event_stop(struct perf_event *event, int mode)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-
-       /*
-        * Protect against the PMI racing with disabling wrmsr,
-        * see comment in intel_pt_interrupt().
-        */
-       ACCESS_ONCE(pt->handle_nmi) = 0;
-       pt_config_start(false);
-
-       if (event->hw.state == PERF_HES_STOPPED)
-               return;
-
-       event->hw.state = PERF_HES_STOPPED;
-
-       if (mode & PERF_EF_UPDATE) {
-               struct pt_buffer *buf = perf_get_aux(&pt->handle);
-
-               if (!buf)
-                       return;
-
-               if (WARN_ON_ONCE(pt->handle.event != event))
-                       return;
-
-               pt_read_offset(buf);
-
-               pt_handle_status(pt);
-
-               pt_update_head(pt);
-       }
-}
-
-static void pt_event_del(struct perf_event *event, int mode)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct pt_buffer *buf;
-
-       pt_event_stop(event, PERF_EF_UPDATE);
-
-       buf = perf_get_aux(&pt->handle);
-
-       if (buf) {
-               if (buf->snapshot)
-                       pt->handle.head =
-                               local_xchg(&buf->data_size,
-                                          buf->nr_pages << PAGE_SHIFT);
-               perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0),
-                                   local_xchg(&buf->lost, 0));
-       }
-}
-
-static int pt_event_add(struct perf_event *event, int mode)
-{
-       struct pt_buffer *buf;
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-       struct hw_perf_event *hwc = &event->hw;
-       int ret = -EBUSY;
-
-       if (pt->handle.event)
-               goto fail;
-
-       buf = perf_aux_output_begin(&pt->handle, event);
-       ret = -EINVAL;
-       if (!buf)
-               goto fail_stop;
-
-       pt_buffer_reset_offsets(buf, pt->handle.head);
-       if (!buf->snapshot) {
-               ret = pt_buffer_reset_markers(buf, &pt->handle);
-               if (ret)
-                       goto fail_end_stop;
-       }
-
-       if (mode & PERF_EF_START) {
-               pt_event_start(event, 0);
-               ret = -EBUSY;
-               if (hwc->state == PERF_HES_STOPPED)
-                       goto fail_end_stop;
-       } else {
-               hwc->state = PERF_HES_STOPPED;
-       }
-
-       return 0;
-
-fail_end_stop:
-       perf_aux_output_end(&pt->handle, 0, true);
-fail_stop:
-       hwc->state = PERF_HES_STOPPED;
-fail:
-       return ret;
-}
-
-static void pt_event_read(struct perf_event *event)
-{
-}
-
-static void pt_event_destroy(struct perf_event *event)
-{
-       x86_del_exclusive(x86_lbr_exclusive_pt);
-}
-
-static int pt_event_init(struct perf_event *event)
-{
-       if (event->attr.type != pt_pmu.pmu.type)
-               return -ENOENT;
-
-       if (!pt_event_valid(event))
-               return -EINVAL;
-
-       if (x86_add_exclusive(x86_lbr_exclusive_pt))
-               return -EBUSY;
-
-       event->destroy = pt_event_destroy;
-
-       return 0;
-}
-
-void cpu_emergency_stop_pt(void)
-{
-       struct pt *pt = this_cpu_ptr(&pt_ctx);
-
-       if (pt->handle.event)
-               pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
-}
-
-static __init int pt_init(void)
-{
-       int ret, cpu, prior_warn = 0;
-
-       BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
-
-       if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
-               return -ENODEV;
-
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               u64 ctl;
-
-               ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl);
-               if (!ret && (ctl & RTIT_CTL_TRACEEN))
-                       prior_warn++;
-       }
-       put_online_cpus();
-
-       if (prior_warn) {
-               x86_add_exclusive(x86_lbr_exclusive_pt);
-               pr_warn("PT is enabled at boot time, doing nothing\n");
-
-               return -EBUSY;
-       }
-
-       ret = pt_pmu_hw_init();
-       if (ret)
-               return ret;
-
-       if (!pt_cap_get(PT_CAP_topa_output)) {
-               pr_warn("ToPA output is not supported on this CPU\n");
-               return -ENODEV;
-       }
-
-       if (!pt_cap_get(PT_CAP_topa_multiple_entries))
-               pt_pmu.pmu.capabilities =
-                       PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
-
-       pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
-       pt_pmu.pmu.attr_groups  = pt_attr_groups;
-       pt_pmu.pmu.task_ctx_nr  = perf_sw_context;
-       pt_pmu.pmu.event_init   = pt_event_init;
-       pt_pmu.pmu.add          = pt_event_add;
-       pt_pmu.pmu.del          = pt_event_del;
-       pt_pmu.pmu.start        = pt_event_start;
-       pt_pmu.pmu.stop         = pt_event_stop;
-       pt_pmu.pmu.read         = pt_event_read;
-       pt_pmu.pmu.setup_aux    = pt_buffer_setup_aux;
-       pt_pmu.pmu.free_aux     = pt_buffer_free_aux;
-       ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
-
-       return ret;
-}
-arch_initcall(pt_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
deleted file mode 100644 (file)
index 24a351a..0000000
+++ /dev/null
@@ -1,783 +0,0 @@
-/*
- * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters
- * Copyright (C) 2013 Google, Inc., Stephane Eranian
- *
- * Intel RAPL interface is specified in the IA-32 Manual Vol3b
- * section 14.7.1 (September 2013)
- *
- * RAPL provides more controls than just reporting energy consumption
- * however here we only expose the 3 energy consumption free running
- * counters (pp0, pkg, dram).
- *
- * Each of those counters increments in a power unit defined by the
- * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
- * but it can vary.
- *
- * Counter to rapl events mappings:
- *
- *  pp0 counter: consumption of all physical cores (power plane 0)
- *       event: rapl_energy_cores
- *    perf code: 0x1
- *
- *  pkg counter: consumption of the whole processor package
- *       event: rapl_energy_pkg
- *    perf code: 0x2
- *
- * dram counter: consumption of the dram domain (servers only)
- *       event: rapl_energy_dram
- *    perf code: 0x3
- *
- * dram counter: consumption of the builtin-gpu domain (client only)
- *       event: rapl_energy_gpu
- *    perf code: 0x4
- *
- * We manage those counters as free running (read-only). They may be
- * use simultaneously by other tools, such as turbostat.
- *
- * The events only support system-wide mode counting. There is no
- * sampling support because it does not make sense and is not
- * supported by the RAPL hardware.
- *
- * Because we want to avoid floating-point operations in the kernel,
- * the events are all reported in fixed point arithmetic (32.32).
- * Tools must adjust the counts to convert them to Watts using
- * the duration of the measurement. Tools may use a function such as
- * ldexp(raw_count, -32);
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/perf_event.h>
-#include <asm/cpu_device_id.h>
-#include "perf_event.h"
-
-/*
- * RAPL energy status counters
- */
-#define RAPL_IDX_PP0_NRG_STAT  0       /* all cores */
-#define INTEL_RAPL_PP0         0x1     /* pseudo-encoding */
-#define RAPL_IDX_PKG_NRG_STAT  1       /* entire package */
-#define INTEL_RAPL_PKG         0x2     /* pseudo-encoding */
-#define RAPL_IDX_RAM_NRG_STAT  2       /* DRAM */
-#define INTEL_RAPL_RAM         0x3     /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT  3       /* gpu */
-#define INTEL_RAPL_PP1         0x4     /* pseudo-encoding */
-
-#define NR_RAPL_DOMAINS         0x4
-static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
-       "pp0-core",
-       "package",
-       "dram",
-       "pp1-gpu",
-};
-
-/* Clients have PP0, PKG */
-#define RAPL_IDX_CLN   (1<<RAPL_IDX_PP0_NRG_STAT|\
-                        1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM */
-#define RAPL_IDX_SRV   (1<<RAPL_IDX_PP0_NRG_STAT|\
-                        1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_RAM_NRG_STAT)
-
-/* Servers have PP0, PKG, RAM, PP1 */
-#define RAPL_IDX_HSW   (1<<RAPL_IDX_PP0_NRG_STAT|\
-                        1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_RAM_NRG_STAT|\
-                        1<<RAPL_IDX_PP1_NRG_STAT)
-
-/* Knights Landing has PKG, RAM */
-#define RAPL_IDX_KNL   (1<<RAPL_IDX_PKG_NRG_STAT|\
-                        1<<RAPL_IDX_RAM_NRG_STAT)
-
-/*
- * event code: LSB 8 bits, passed in attr->config
- * any other bit is reserved
- */
-#define RAPL_EVENT_MASK        0xFFULL
-
-#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)          \
-static ssize_t __rapl_##_var##_show(struct kobject *kobj,      \
-                               struct kobj_attribute *attr,    \
-                               char *page)                     \
-{                                                              \
-       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);             \
-       return sprintf(page, _format "\n");                     \
-}                                                              \
-static struct kobj_attribute format_attr_##_var =              \
-       __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-
-#define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */
-
-#define RAPL_EVENT_ATTR_STR(_name, v, str)                                     \
-static struct perf_pmu_events_attr event_attr_##v = {                          \
-       .attr           = __ATTR(_name, 0444, perf_event_sysfs_show, NULL),     \
-       .id             = 0,                                                    \
-       .event_str      = str,                                                  \
-};
-
-struct rapl_pmu {
-       spinlock_t       lock;
-       int              n_active; /* number of active events */
-       struct list_head active_list;
-       struct pmu       *pmu; /* pointer to rapl_pmu_class */
-       ktime_t          timer_interval; /* in ktime_t unit */
-       struct hrtimer   hrtimer;
-};
-
-static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;  /* 1/2^hw_unit Joule */
-static struct pmu rapl_pmu_class;
-static cpumask_t rapl_cpu_mask;
-static int rapl_cntr_mask;
-
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu);
-static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free);
-
-static struct x86_pmu_quirk *rapl_quirks;
-static inline u64 rapl_read_counter(struct perf_event *event)
-{
-       u64 raw;
-       rdmsrl(event->hw.event_base, raw);
-       return raw;
-}
-
-#define rapl_add_quirk(func_)                                          \
-do {                                                                   \
-       static struct x86_pmu_quirk __quirk __initdata = {              \
-               .func = func_,                                          \
-       };                                                              \
-       __quirk.next = rapl_quirks;                                     \
-       rapl_quirks = &__quirk;                                         \
-} while (0)
-
-static inline u64 rapl_scale(u64 v, int cfg)
-{
-       if (cfg > NR_RAPL_DOMAINS) {
-               pr_warn("invalid domain %d, failed to scale data\n", cfg);
-               return v;
-       }
-       /*
-        * scale delta to smallest unit (1/2^32)
-        * users must then scale back: count * 1/(1e9*2^32) to get Joules
-        * or use ldexp(count, -32).
-        * Watts = Joules/Time delta
-        */
-       return v << (32 - rapl_hw_unit[cfg - 1]);
-}
-
-static u64 rapl_event_update(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 prev_raw_count, new_raw_count;
-       s64 delta, sdelta;
-       int shift = RAPL_CNTR_WIDTH;
-
-again:
-       prev_raw_count = local64_read(&hwc->prev_count);
-       rdmsrl(event->hw.event_base, new_raw_count);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                           new_raw_count) != prev_raw_count) {
-               cpu_relax();
-               goto again;
-       }
-
-       /*
-        * Now we have the new raw value and have updated the prev
-        * timestamp already. We can now calculate the elapsed delta
-        * (event-)time and add that to the generic event.
-        *
-        * Careful, not all hw sign-extends above the physical width
-        * of the count.
-        */
-       delta = (new_raw_count << shift) - (prev_raw_count << shift);
-       delta >>= shift;
-
-       sdelta = rapl_scale(delta, event->hw.config);
-
-       local64_add(sdelta, &event->count);
-
-       return new_raw_count;
-}
-
-static void rapl_start_hrtimer(struct rapl_pmu *pmu)
-{
-       hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
-                    HRTIMER_MODE_REL_PINNED);
-}
-
-static void rapl_stop_hrtimer(struct rapl_pmu *pmu)
-{
-       hrtimer_cancel(&pmu->hrtimer);
-}
-
-static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       struct perf_event *event;
-       unsigned long flags;
-
-       if (!pmu->n_active)
-               return HRTIMER_NORESTART;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-
-       list_for_each_entry(event, &pmu->active_list, active_entry) {
-               rapl_event_update(event);
-       }
-
-       spin_unlock_irqrestore(&pmu->lock, flags);
-
-       hrtimer_forward_now(hrtimer, pmu->timer_interval);
-
-       return HRTIMER_RESTART;
-}
-
-static void rapl_hrtimer_init(struct rapl_pmu *pmu)
-{
-       struct hrtimer *hr = &pmu->hrtimer;
-
-       hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       hr->function = rapl_hrtimer_handle;
-}
-
-static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
-                                  struct perf_event *event)
-{
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       event->hw.state = 0;
-
-       list_add_tail(&event->active_entry, &pmu->active_list);
-
-       local64_set(&event->hw.prev_count, rapl_read_counter(event));
-
-       pmu->n_active++;
-       if (pmu->n_active == 1)
-               rapl_start_hrtimer(pmu);
-}
-
-static void rapl_pmu_event_start(struct perf_event *event, int mode)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       unsigned long flags;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-       __rapl_pmu_event_start(pmu, event);
-       spin_unlock_irqrestore(&pmu->lock, flags);
-}
-
-static void rapl_pmu_event_stop(struct perf_event *event, int mode)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned long flags;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-
-       /* mark event as deactivated and stopped */
-       if (!(hwc->state & PERF_HES_STOPPED)) {
-               WARN_ON_ONCE(pmu->n_active <= 0);
-               pmu->n_active--;
-               if (pmu->n_active == 0)
-                       rapl_stop_hrtimer(pmu);
-
-               list_del(&event->active_entry);
-
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-       }
-
-       /* check if update of sw counter is necessary */
-       if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               rapl_event_update(event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-
-       spin_unlock_irqrestore(&pmu->lock, flags);
-}
-
-static int rapl_pmu_event_add(struct perf_event *event, int mode)
-{
-       struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu);
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned long flags;
-
-       spin_lock_irqsave(&pmu->lock, flags);
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
-       if (mode & PERF_EF_START)
-               __rapl_pmu_event_start(pmu, event);
-
-       spin_unlock_irqrestore(&pmu->lock, flags);
-
-       return 0;
-}
-
-static void rapl_pmu_event_del(struct perf_event *event, int flags)
-{
-       rapl_pmu_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int rapl_pmu_event_init(struct perf_event *event)
-{
-       u64 cfg = event->attr.config & RAPL_EVENT_MASK;
-       int bit, msr, ret = 0;
-
-       /* only look at RAPL events */
-       if (event->attr.type != rapl_pmu_class.type)
-               return -ENOENT;
-
-       /* check only supported bits are set */
-       if (event->attr.config & ~RAPL_EVENT_MASK)
-               return -EINVAL;
-
-       /*
-        * check event is known (determines counter)
-        */
-       switch (cfg) {
-       case INTEL_RAPL_PP0:
-               bit = RAPL_IDX_PP0_NRG_STAT;
-               msr = MSR_PP0_ENERGY_STATUS;
-               break;
-       case INTEL_RAPL_PKG:
-               bit = RAPL_IDX_PKG_NRG_STAT;
-               msr = MSR_PKG_ENERGY_STATUS;
-               break;
-       case INTEL_RAPL_RAM:
-               bit = RAPL_IDX_RAM_NRG_STAT;
-               msr = MSR_DRAM_ENERGY_STATUS;
-               break;
-       case INTEL_RAPL_PP1:
-               bit = RAPL_IDX_PP1_NRG_STAT;
-               msr = MSR_PP1_ENERGY_STATUS;
-               break;
-       default:
-               return -EINVAL;
-       }
-       /* check event supported */
-       if (!(rapl_cntr_mask & (1 << bit)))
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       /* must be done before validate_group */
-       event->hw.event_base = msr;
-       event->hw.config = cfg;
-       event->hw.idx = bit;
-
-       return ret;
-}
-
-static void rapl_pmu_event_read(struct perf_event *event)
-{
-       rapl_event_update(event);
-}
-
-static ssize_t rapl_get_attr_cpumask(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
-
-static struct attribute *rapl_pmu_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group rapl_pmu_attr_group = {
-       .attrs = rapl_pmu_attrs,
-};
-
-RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
-RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
-RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
-RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
-
-RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
-RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
-
-/*
- * we compute in 0.23 nJ increments regardless of MSR
- */
-RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
-RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
-
-static struct attribute *rapl_events_srv_attr[] = {
-       EVENT_PTR(rapl_cores),
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_ram),
-
-       EVENT_PTR(rapl_cores_unit),
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_ram_unit),
-
-       EVENT_PTR(rapl_cores_scale),
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_ram_scale),
-       NULL,
-};
-
-static struct attribute *rapl_events_cln_attr[] = {
-       EVENT_PTR(rapl_cores),
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_gpu),
-
-       EVENT_PTR(rapl_cores_unit),
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_gpu_unit),
-
-       EVENT_PTR(rapl_cores_scale),
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_gpu_scale),
-       NULL,
-};
-
-static struct attribute *rapl_events_hsw_attr[] = {
-       EVENT_PTR(rapl_cores),
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_gpu),
-       EVENT_PTR(rapl_ram),
-
-       EVENT_PTR(rapl_cores_unit),
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_gpu_unit),
-       EVENT_PTR(rapl_ram_unit),
-
-       EVENT_PTR(rapl_cores_scale),
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_gpu_scale),
-       EVENT_PTR(rapl_ram_scale),
-       NULL,
-};
-
-static struct attribute *rapl_events_knl_attr[] = {
-       EVENT_PTR(rapl_pkg),
-       EVENT_PTR(rapl_ram),
-
-       EVENT_PTR(rapl_pkg_unit),
-       EVENT_PTR(rapl_ram_unit),
-
-       EVENT_PTR(rapl_pkg_scale),
-       EVENT_PTR(rapl_ram_scale),
-       NULL,
-};
-
-static struct attribute_group rapl_pmu_events_group = {
-       .name = "events",
-       .attrs = NULL, /* patched at runtime */
-};
-
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
-static struct attribute *rapl_formats_attr[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-
-static struct attribute_group rapl_pmu_format_group = {
-       .name = "format",
-       .attrs = rapl_formats_attr,
-};
-
-const struct attribute_group *rapl_attr_groups[] = {
-       &rapl_pmu_attr_group,
-       &rapl_pmu_format_group,
-       &rapl_pmu_events_group,
-       NULL,
-};
-
-static struct pmu rapl_pmu_class = {
-       .attr_groups    = rapl_attr_groups,
-       .task_ctx_nr    = perf_invalid_context, /* system-wide only */
-       .event_init     = rapl_pmu_event_init,
-       .add            = rapl_pmu_event_add, /* must have */
-       .del            = rapl_pmu_event_del, /* must have */
-       .start          = rapl_pmu_event_start,
-       .stop           = rapl_pmu_event_stop,
-       .read           = rapl_pmu_event_read,
-};
-
-static void rapl_cpu_exit(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-       int i, phys_id = topology_physical_package_id(cpu);
-       int target = -1;
-
-       /* find a new cpu on same package */
-       for_each_online_cpu(i) {
-               if (i == cpu)
-                       continue;
-               if (phys_id == topology_physical_package_id(i)) {
-                       target = i;
-                       break;
-               }
-       }
-       /*
-        * clear cpu from cpumask
-        * if was set in cpumask and still some cpu on package,
-        * then move to new cpu
-        */
-       if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0)
-               cpumask_set_cpu(target, &rapl_cpu_mask);
-
-       WARN_ON(cpumask_empty(&rapl_cpu_mask));
-       /*
-        * migrate events and context to new cpu
-        */
-       if (target >= 0)
-               perf_pmu_migrate_context(pmu->pmu, cpu, target);
-
-       /* cancel overflow polling timer for CPU */
-       rapl_stop_hrtimer(pmu);
-}
-
-static void rapl_cpu_init(int cpu)
-{
-       int i, phys_id = topology_physical_package_id(cpu);
-
-       /* check if phys_is is already covered */
-       for_each_cpu(i, &rapl_cpu_mask) {
-               if (phys_id == topology_physical_package_id(i))
-                       return;
-       }
-       /* was not found, so add it */
-       cpumask_set_cpu(cpu, &rapl_cpu_mask);
-}
-
-static __init void rapl_hsw_server_quirk(void)
-{
-       /*
-        * DRAM domain on HSW server has fixed energy unit which can be
-        * different than the unit from power unit MSR.
-        * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
-        * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
-        */
-       rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16;
-}
-
-static int rapl_cpu_prepare(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-       int phys_id = topology_physical_package_id(cpu);
-       u64 ms;
-
-       if (pmu)
-               return 0;
-
-       if (phys_id < 0)
-               return -1;
-
-       pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
-       if (!pmu)
-               return -1;
-       spin_lock_init(&pmu->lock);
-
-       INIT_LIST_HEAD(&pmu->active_list);
-
-       pmu->pmu = &rapl_pmu_class;
-
-       /*
-        * use reference of 200W for scaling the timeout
-        * to avoid missing counter overflows.
-        * 200W = 200 Joules/sec
-        * divide interval by 2 to avoid lockstep (2 * 100)
-        * if hw unit is 32, then we use 2 ms 1/200/2
-        */
-       if (rapl_hw_unit[0] < 32)
-               ms = (1000 / (2 * 100)) * (1ULL << (32 - rapl_hw_unit[0] - 1));
-       else
-               ms = 2;
-
-       pmu->timer_interval = ms_to_ktime(ms);
-
-       rapl_hrtimer_init(pmu);
-
-       /* set RAPL pmu for this cpu for now */
-       per_cpu(rapl_pmu, cpu) = pmu;
-       per_cpu(rapl_pmu_to_free, cpu) = NULL;
-
-       return 0;
-}
-
-static void rapl_cpu_kfree(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu);
-
-       kfree(pmu);
-
-       per_cpu(rapl_pmu_to_free, cpu) = NULL;
-}
-
-static int rapl_cpu_dying(int cpu)
-{
-       struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
-
-       if (!pmu)
-               return 0;
-
-       per_cpu(rapl_pmu, cpu) = NULL;
-
-       per_cpu(rapl_pmu_to_free, cpu) = pmu;
-
-       return 0;
-}
-
-static int rapl_cpu_notifier(struct notifier_block *self,
-                            unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               rapl_cpu_prepare(cpu);
-               break;
-       case CPU_STARTING:
-               rapl_cpu_init(cpu);
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               rapl_cpu_dying(cpu);
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               rapl_cpu_kfree(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               rapl_cpu_exit(cpu);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static int rapl_check_hw_unit(void)
-{
-       u64 msr_rapl_power_unit_bits;
-       int i;
-
-       /* protect rdmsrl() to handle virtualization */
-       if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
-               return -1;
-       for (i = 0; i < NR_RAPL_DOMAINS; i++)
-               rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
-
-       return 0;
-}
-
-static const struct x86_cpu_id rapl_cpu_match[] = {
-       [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
-       [1] = {},
-};
-
-static int __init rapl_pmu_init(void)
-{
-       struct rapl_pmu *pmu;
-       int cpu, ret;
-       struct x86_pmu_quirk *quirk;
-       int i;
-
-       /*
-        * check for Intel processor family 6
-        */
-       if (!x86_match_cpu(rapl_cpu_match))
-               return 0;
-
-       /* check supported CPU */
-       switch (boot_cpu_data.x86_model) {
-       case 42: /* Sandy Bridge */
-       case 58: /* Ivy Bridge */
-               rapl_cntr_mask = RAPL_IDX_CLN;
-               rapl_pmu_events_group.attrs = rapl_events_cln_attr;
-               break;
-       case 63: /* Haswell-Server */
-               rapl_add_quirk(rapl_hsw_server_quirk);
-               rapl_cntr_mask = RAPL_IDX_SRV;
-               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-               break;
-       case 60: /* Haswell */
-       case 69: /* Haswell-Celeron */
-       case 61: /* Broadwell */
-               rapl_cntr_mask = RAPL_IDX_HSW;
-               rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
-               break;
-       case 45: /* Sandy Bridge-EP */
-       case 62: /* IvyTown */
-               rapl_cntr_mask = RAPL_IDX_SRV;
-               rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-               break;
-       case 87: /* Knights Landing */
-               rapl_add_quirk(rapl_hsw_server_quirk);
-               rapl_cntr_mask = RAPL_IDX_KNL;
-               rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-
-       default:
-               /* unsupported */
-               return 0;
-       }
-       ret = rapl_check_hw_unit();
-       if (ret)
-               return ret;
-
-       /* run cpu model quirks */
-       for (quirk = rapl_quirks; quirk; quirk = quirk->next)
-               quirk->func();
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu) {
-               ret = rapl_cpu_prepare(cpu);
-               if (ret)
-                       goto out;
-               rapl_cpu_init(cpu);
-       }
-
-       __perf_cpu_notifier(rapl_cpu_notifier);
-
-       ret = perf_pmu_register(&rapl_pmu_class, "power", -1);
-       if (WARN_ON(ret)) {
-               pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret);
-               cpu_notifier_register_done();
-               return -1;
-       }
-
-       pmu = __this_cpu_read(rapl_pmu);
-
-       pr_info("RAPL PMU detected,"
-               " API unit is 2^-32 Joules,"
-               " %d fixed counters"
-               " %llu ms ovfl timer\n",
-               hweight32(rapl_cntr_mask),
-               ktime_to_ms(pmu->timer_interval));
-       for (i = 0; i < NR_RAPL_DOMAINS; i++) {
-               if (rapl_cntr_mask & (1 << i)) {
-                       pr_info("hw unit of domain %s 2^-%d Joules\n",
-                               rapl_domain_names[i], rapl_hw_unit[i]);
-               }
-       }
-out:
-       cpu_notifier_register_done();
-
-       return 0;
-}
-device_initcall(rapl_pmu_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
deleted file mode 100644 (file)
index 3bf41d4..0000000
+++ /dev/null
@@ -1,1401 +0,0 @@
-#include "perf_event_intel_uncore.h"
-
-static struct intel_uncore_type *empty_uncore[] = { NULL, };
-struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
-struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
-
-static bool pcidrv_registered;
-struct pci_driver *uncore_pci_driver;
-/* pci bus to socket mapping */
-DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
-struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
-struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
-
-static DEFINE_RAW_SPINLOCK(uncore_box_lock);
-/* mask of cpus that collect uncore events */
-static cpumask_t uncore_cpu_mask;
-
-/* constraint for the fixed counter */
-static struct event_constraint uncore_constraint_fixed =
-       EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
-struct event_constraint uncore_constraint_empty =
-       EVENT_CONSTRAINT(0, 0, 0);
-
-int uncore_pcibus_to_physid(struct pci_bus *bus)
-{
-       struct pci2phy_map *map;
-       int phys_id = -1;
-
-       raw_spin_lock(&pci2phy_map_lock);
-       list_for_each_entry(map, &pci2phy_map_head, list) {
-               if (map->segment == pci_domain_nr(bus)) {
-                       phys_id = map->pbus_to_physid[bus->number];
-                       break;
-               }
-       }
-       raw_spin_unlock(&pci2phy_map_lock);
-
-       return phys_id;
-}
-
-struct pci2phy_map *__find_pci2phy_map(int segment)
-{
-       struct pci2phy_map *map, *alloc = NULL;
-       int i;
-
-       lockdep_assert_held(&pci2phy_map_lock);
-
-lookup:
-       list_for_each_entry(map, &pci2phy_map_head, list) {
-               if (map->segment == segment)
-                       goto end;
-       }
-
-       if (!alloc) {
-               raw_spin_unlock(&pci2phy_map_lock);
-               alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
-               raw_spin_lock(&pci2phy_map_lock);
-
-               if (!alloc)
-                       return NULL;
-
-               goto lookup;
-       }
-
-       map = alloc;
-       alloc = NULL;
-       map->segment = segment;
-       for (i = 0; i < 256; i++)
-               map->pbus_to_physid[i] = -1;
-       list_add_tail(&map->list, &pci2phy_map_head);
-
-end:
-       kfree(alloc);
-       return map;
-}
-
-ssize_t uncore_event_show(struct kobject *kobj,
-                         struct kobj_attribute *attr, char *buf)
-{
-       struct uncore_event_desc *event =
-               container_of(attr, struct uncore_event_desc, attr);
-       return sprintf(buf, "%s", event->config);
-}
-
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
-{
-       return container_of(event->pmu, struct intel_uncore_pmu, pmu);
-}
-
-struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
-{
-       struct intel_uncore_box *box;
-
-       box = *per_cpu_ptr(pmu->box, cpu);
-       if (box)
-               return box;
-
-       raw_spin_lock(&uncore_box_lock);
-       /* Recheck in lock to handle races. */
-       if (*per_cpu_ptr(pmu->box, cpu))
-               goto out;
-       list_for_each_entry(box, &pmu->box_list, list) {
-               if (box->phys_id == topology_physical_package_id(cpu)) {
-                       atomic_inc(&box->refcnt);
-                       *per_cpu_ptr(pmu->box, cpu) = box;
-                       break;
-               }
-       }
-out:
-       raw_spin_unlock(&uncore_box_lock);
-
-       return *per_cpu_ptr(pmu->box, cpu);
-}
-
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
-{
-       /*
-        * perf core schedules event on the basis of cpu, uncore events are
-        * collected by one of the cpus inside a physical package.
-        */
-       return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
-}
-
-u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       u64 count;
-
-       rdmsrl(event->hw.event_base, count);
-
-       return count;
-}
-
-/*
- * generic get constraint function for shared match/mask registers.
- */
-struct event_constraint *
-uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_extra_reg *er;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       unsigned long flags;
-       bool ok = false;
-
-       /*
-        * reg->alloc can be set due to existing state, so for fake box we
-        * need to ignore this, otherwise we might fail to allocate proper
-        * fake state for this extra reg constraint.
-        */
-       if (reg1->idx == EXTRA_REG_NONE ||
-           (!uncore_box_is_fake(box) && reg1->alloc))
-               return NULL;
-
-       er = &box->shared_regs[reg1->idx];
-       raw_spin_lock_irqsave(&er->lock, flags);
-       if (!atomic_read(&er->ref) ||
-           (er->config1 == reg1->config && er->config2 == reg2->config)) {
-               atomic_inc(&er->ref);
-               er->config1 = reg1->config;
-               er->config2 = reg2->config;
-               ok = true;
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       if (ok) {
-               if (!uncore_box_is_fake(box))
-                       reg1->alloc = 1;
-               return NULL;
-       }
-
-       return &uncore_constraint_empty;
-}
-
-void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_extra_reg *er;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-
-       /*
-        * Only put constraint if extra reg was actually allocated. Also
-        * takes care of event which do not use an extra shared reg.
-        *
-        * Also, if this is a fake box we shouldn't touch any event state
-        * (reg->alloc) and we don't care about leaving inconsistent box
-        * state either since it will be thrown out.
-        */
-       if (uncore_box_is_fake(box) || !reg1->alloc)
-               return;
-
-       er = &box->shared_regs[reg1->idx];
-       atomic_dec(&er->ref);
-       reg1->alloc = 0;
-}
-
-u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
-{
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       u64 config;
-
-       er = &box->shared_regs[idx];
-
-       raw_spin_lock_irqsave(&er->lock, flags);
-       config = er->config;
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       return config;
-}
-
-static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       hwc->idx = idx;
-       hwc->last_tag = ++box->tags[idx];
-
-       if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
-               hwc->event_base = uncore_fixed_ctr(box);
-               hwc->config_base = uncore_fixed_ctl(box);
-               return;
-       }
-
-       hwc->config_base = uncore_event_ctl(box, hwc->idx);
-       hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
-}
-
-void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
-{
-       u64 prev_count, new_count, delta;
-       int shift;
-
-       if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
-               shift = 64 - uncore_fixed_ctr_bits(box);
-       else
-               shift = 64 - uncore_perf_ctr_bits(box);
-
-       /* the hrtimer might modify the previous event value */
-again:
-       prev_count = local64_read(&event->hw.prev_count);
-       new_count = uncore_read_counter(box, event);
-       if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
-               goto again;
-
-       delta = (new_count << shift) - (prev_count << shift);
-       delta >>= shift;
-
-       local64_add(delta, &event->count);
-}
-
-/*
- * The overflow interrupt is unavailable for SandyBridge-EP, is broken
- * for SandyBridge. So we use hrtimer to periodically poll the counter
- * to avoid overflow.
- */
-static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
-{
-       struct intel_uncore_box *box;
-       struct perf_event *event;
-       unsigned long flags;
-       int bit;
-
-       box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
-       if (!box->n_active || box->cpu != smp_processor_id())
-               return HRTIMER_NORESTART;
-       /*
-        * disable local interrupt to prevent uncore_pmu_event_start/stop
-        * to interrupt the update process
-        */
-       local_irq_save(flags);
-
-       /*
-        * handle boxes with an active event list as opposed to active
-        * counters
-        */
-       list_for_each_entry(event, &box->active_list, active_entry) {
-               uncore_perf_event_update(box, event);
-       }
-
-       for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
-               uncore_perf_event_update(box, box->events[bit]);
-
-       local_irq_restore(flags);
-
-       hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
-       return HRTIMER_RESTART;
-}
-
-void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
-{
-       hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
-                     HRTIMER_MODE_REL_PINNED);
-}
-
-void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
-{
-       hrtimer_cancel(&box->hrtimer);
-}
-
-static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
-{
-       hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       box->hrtimer.function = uncore_pmu_hrtimer;
-}
-
-static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
-{
-       struct intel_uncore_box *box;
-       int i, size;
-
-       size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
-
-       box = kzalloc_node(size, GFP_KERNEL, node);
-       if (!box)
-               return NULL;
-
-       for (i = 0; i < type->num_shared_regs; i++)
-               raw_spin_lock_init(&box->shared_regs[i].lock);
-
-       uncore_pmu_init_hrtimer(box);
-       atomic_set(&box->refcnt, 1);
-       box->cpu = -1;
-       box->phys_id = -1;
-
-       /* set default hrtimer timeout */
-       box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
-
-       INIT_LIST_HEAD(&box->active_list);
-
-       return box;
-}
-
-/*
- * Using uncore_pmu_event_init pmu event_init callback
- * as a detection point for uncore events.
- */
-static int uncore_pmu_event_init(struct perf_event *event);
-
-static bool is_uncore_event(struct perf_event *event)
-{
-       return event->pmu->event_init == uncore_pmu_event_init;
-}
-
-static int
-uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
-{
-       struct perf_event *event;
-       int n, max_count;
-
-       max_count = box->pmu->type->num_counters;
-       if (box->pmu->type->fixed_ctl)
-               max_count++;
-
-       if (box->n_events >= max_count)
-               return -EINVAL;
-
-       n = box->n_events;
-
-       if (is_uncore_event(leader)) {
-               box->event_list[n] = leader;
-               n++;
-       }
-
-       if (!dogrp)
-               return n;
-
-       list_for_each_entry(event, &leader->sibling_list, group_entry) {
-               if (!is_uncore_event(event) ||
-                   event->state <= PERF_EVENT_STATE_OFF)
-                       continue;
-
-               if (n >= max_count)
-                       return -EINVAL;
-
-               box->event_list[n] = event;
-               n++;
-       }
-       return n;
-}
-
-static struct event_constraint *
-uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_type *type = box->pmu->type;
-       struct event_constraint *c;
-
-       if (type->ops->get_constraint) {
-               c = type->ops->get_constraint(box, event);
-               if (c)
-                       return c;
-       }
-
-       if (event->attr.config == UNCORE_FIXED_EVENT)
-               return &uncore_constraint_fixed;
-
-       if (type->constraints) {
-               for_each_event_constraint(c, type->constraints) {
-                       if ((event->hw.config & c->cmask) == c->code)
-                               return c;
-               }
-       }
-
-       return &type->unconstrainted;
-}
-
-static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       if (box->pmu->type->ops->put_constraint)
-               box->pmu->type->ops->put_constraint(box, event);
-}
-
-static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
-{
-       unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
-       struct event_constraint *c;
-       int i, wmin, wmax, ret = 0;
-       struct hw_perf_event *hwc;
-
-       bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
-
-       for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
-               c = uncore_get_event_constraint(box, box->event_list[i]);
-               box->event_constraint[i] = c;
-               wmin = min(wmin, c->weight);
-               wmax = max(wmax, c->weight);
-       }
-
-       /* fastpath, try to reuse previous register */
-       for (i = 0; i < n; i++) {
-               hwc = &box->event_list[i]->hw;
-               c = box->event_constraint[i];
-
-               /* never assigned */
-               if (hwc->idx == -1)
-                       break;
-
-               /* constraint still honored */
-               if (!test_bit(hwc->idx, c->idxmsk))
-                       break;
-
-               /* not already used */
-               if (test_bit(hwc->idx, used_mask))
-                       break;
-
-               __set_bit(hwc->idx, used_mask);
-               if (assign)
-                       assign[i] = hwc->idx;
-       }
-       /* slow path */
-       if (i != n)
-               ret = perf_assign_events(box->event_constraint, n,
-                                        wmin, wmax, n, assign);
-
-       if (!assign || ret) {
-               for (i = 0; i < n; i++)
-                       uncore_put_event_constraint(box, box->event_list[i]);
-       }
-       return ret ? -EINVAL : 0;
-}
-
-static void uncore_pmu_event_start(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       int idx = event->hw.idx;
-
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
-               return;
-
-       event->hw.state = 0;
-       box->events[idx] = event;
-       box->n_active++;
-       __set_bit(idx, box->active_mask);
-
-       local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
-       uncore_enable_event(box, event);
-
-       if (box->n_active == 1) {
-               uncore_enable_box(box);
-               uncore_pmu_start_hrtimer(box);
-       }
-}
-
-static void uncore_pmu_event_stop(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
-               uncore_disable_event(box, event);
-               box->n_active--;
-               box->events[hwc->idx] = NULL;
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-
-               if (box->n_active == 0) {
-                       uncore_disable_box(box);
-                       uncore_pmu_cancel_hrtimer(box);
-               }
-       }
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               uncore_perf_event_update(box, event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static int uncore_pmu_event_add(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-       int assign[UNCORE_PMC_IDX_MAX];
-       int i, n, ret;
-
-       if (!box)
-               return -ENODEV;
-
-       ret = n = uncore_collect_events(box, event, false);
-       if (ret < 0)
-               return ret;
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-       if (!(flags & PERF_EF_START))
-               hwc->state |= PERF_HES_ARCH;
-
-       ret = uncore_assign_events(box, assign, n);
-       if (ret)
-               return ret;
-
-       /* save events moving to new counters */
-       for (i = 0; i < box->n_events; i++) {
-               event = box->event_list[i];
-               hwc = &event->hw;
-
-               if (hwc->idx == assign[i] &&
-                       hwc->last_tag == box->tags[assign[i]])
-                       continue;
-               /*
-                * Ensure we don't accidentally enable a stopped
-                * counter simply because we rescheduled.
-                */
-               if (hwc->state & PERF_HES_STOPPED)
-                       hwc->state |= PERF_HES_ARCH;
-
-               uncore_pmu_event_stop(event, PERF_EF_UPDATE);
-       }
-
-       /* reprogram moved events into new counters */
-       for (i = 0; i < n; i++) {
-               event = box->event_list[i];
-               hwc = &event->hw;
-
-               if (hwc->idx != assign[i] ||
-                       hwc->last_tag != box->tags[assign[i]])
-                       uncore_assign_hw_event(box, event, assign[i]);
-               else if (i < box->n_events)
-                       continue;
-
-               if (hwc->state & PERF_HES_ARCH)
-                       continue;
-
-               uncore_pmu_event_start(event, 0);
-       }
-       box->n_events = n;
-
-       return 0;
-}
-
-static void uncore_pmu_event_del(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       int i;
-
-       uncore_pmu_event_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < box->n_events; i++) {
-               if (event == box->event_list[i]) {
-                       uncore_put_event_constraint(box, event);
-
-                       while (++i < box->n_events)
-                               box->event_list[i - 1] = box->event_list[i];
-
-                       --box->n_events;
-                       break;
-               }
-       }
-
-       event->hw.idx = -1;
-       event->hw.last_tag = ~0ULL;
-}
-
-void uncore_pmu_event_read(struct perf_event *event)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       uncore_perf_event_update(box, event);
-}
-
-/*
- * validation ensures the group can be loaded onto the
- * PMU if it was the only group available.
- */
-static int uncore_validate_group(struct intel_uncore_pmu *pmu,
-                               struct perf_event *event)
-{
-       struct perf_event *leader = event->group_leader;
-       struct intel_uncore_box *fake_box;
-       int ret = -EINVAL, n;
-
-       fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
-       if (!fake_box)
-               return -ENOMEM;
-
-       fake_box->pmu = pmu;
-       /*
-        * the event is not yet connected with its
-        * siblings therefore we must first collect
-        * existing siblings, then add the new event
-        * before we can simulate the scheduling
-        */
-       n = uncore_collect_events(fake_box, leader, true);
-       if (n < 0)
-               goto out;
-
-       fake_box->n_events = n;
-       n = uncore_collect_events(fake_box, event, false);
-       if (n < 0)
-               goto out;
-
-       fake_box->n_events = n;
-
-       ret = uncore_assign_events(fake_box, NULL, n);
-out:
-       kfree(fake_box);
-       return ret;
-}
-
-static int uncore_pmu_event_init(struct perf_event *event)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       struct hw_perf_event *hwc = &event->hw;
-       int ret;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       pmu = uncore_event_to_pmu(event);
-       /* no device found for this pmu */
-       if (pmu->func_id < 0)
-               return -ENOENT;
-
-       /*
-        * Uncore PMU does measure at all privilege level all the time.
-        * So it doesn't make sense to specify any exclude bits.
-        */
-       if (event->attr.exclude_user || event->attr.exclude_kernel ||
-                       event->attr.exclude_hv || event->attr.exclude_idle)
-               return -EINVAL;
-
-       /* Sampling not supported yet */
-       if (hwc->sample_period)
-               return -EINVAL;
-
-       /*
-        * Place all uncore events for a particular physical package
-        * onto a single cpu
-        */
-       if (event->cpu < 0)
-               return -EINVAL;
-       box = uncore_pmu_to_box(pmu, event->cpu);
-       if (!box || box->cpu < 0)
-               return -EINVAL;
-       event->cpu = box->cpu;
-
-       event->hw.idx = -1;
-       event->hw.last_tag = ~0ULL;
-       event->hw.extra_reg.idx = EXTRA_REG_NONE;
-       event->hw.branch_reg.idx = EXTRA_REG_NONE;
-
-       if (event->attr.config == UNCORE_FIXED_EVENT) {
-               /* no fixed counter */
-               if (!pmu->type->fixed_ctl)
-                       return -EINVAL;
-               /*
-                * if there is only one fixed counter, only the first pmu
-                * can access the fixed counter
-                */
-               if (pmu->type->single_fixed && pmu->pmu_idx > 0)
-                       return -EINVAL;
-
-               /* fixed counters have event field hardcoded to zero */
-               hwc->config = 0ULL;
-       } else {
-               hwc->config = event->attr.config & pmu->type->event_mask;
-               if (pmu->type->ops->hw_config) {
-                       ret = pmu->type->ops->hw_config(box, event);
-                       if (ret)
-                               return ret;
-               }
-       }
-
-       if (event->group_leader != event)
-               ret = uncore_validate_group(pmu, event);
-       else
-               ret = 0;
-
-       return ret;
-}
-
-static ssize_t uncore_get_attr_cpumask(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
-}
-
-static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
-
-static struct attribute *uncore_pmu_attrs[] = {
-       &dev_attr_cpumask.attr,
-       NULL,
-};
-
-static struct attribute_group uncore_pmu_attr_group = {
-       .attrs = uncore_pmu_attrs,
-};
-
-static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
-{
-       int ret;
-
-       if (!pmu->type->pmu) {
-               pmu->pmu = (struct pmu) {
-                       .attr_groups    = pmu->type->attr_groups,
-                       .task_ctx_nr    = perf_invalid_context,
-                       .event_init     = uncore_pmu_event_init,
-                       .add            = uncore_pmu_event_add,
-                       .del            = uncore_pmu_event_del,
-                       .start          = uncore_pmu_event_start,
-                       .stop           = uncore_pmu_event_stop,
-                       .read           = uncore_pmu_event_read,
-               };
-       } else {
-               pmu->pmu = *pmu->type->pmu;
-               pmu->pmu.attr_groups = pmu->type->attr_groups;
-       }
-
-       if (pmu->type->num_boxes == 1) {
-               if (strlen(pmu->type->name) > 0)
-                       sprintf(pmu->name, "uncore_%s", pmu->type->name);
-               else
-                       sprintf(pmu->name, "uncore");
-       } else {
-               sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
-                       pmu->pmu_idx);
-       }
-
-       ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
-       return ret;
-}
-
-static void __init uncore_type_exit(struct intel_uncore_type *type)
-{
-       int i;
-
-       for (i = 0; i < type->num_boxes; i++)
-               free_percpu(type->pmus[i].box);
-       kfree(type->pmus);
-       type->pmus = NULL;
-       kfree(type->events_group);
-       type->events_group = NULL;
-}
-
-static void __init uncore_types_exit(struct intel_uncore_type **types)
-{
-       int i;
-       for (i = 0; types[i]; i++)
-               uncore_type_exit(types[i]);
-}
-
-static int __init uncore_type_init(struct intel_uncore_type *type)
-{
-       struct intel_uncore_pmu *pmus;
-       struct attribute_group *attr_group;
-       struct attribute **attrs;
-       int i, j;
-
-       pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
-       if (!pmus)
-               return -ENOMEM;
-
-       type->pmus = pmus;
-
-       type->unconstrainted = (struct event_constraint)
-               __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
-                               0, type->num_counters, 0, 0);
-
-       for (i = 0; i < type->num_boxes; i++) {
-               pmus[i].func_id = -1;
-               pmus[i].pmu_idx = i;
-               pmus[i].type = type;
-               INIT_LIST_HEAD(&pmus[i].box_list);
-               pmus[i].box = alloc_percpu(struct intel_uncore_box *);
-               if (!pmus[i].box)
-                       goto fail;
-       }
-
-       if (type->event_descs) {
-               i = 0;
-               while (type->event_descs[i].attr.attr.name)
-                       i++;
-
-               attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
-                                       sizeof(*attr_group), GFP_KERNEL);
-               if (!attr_group)
-                       goto fail;
-
-               attrs = (struct attribute **)(attr_group + 1);
-               attr_group->name = "events";
-               attr_group->attrs = attrs;
-
-               for (j = 0; j < i; j++)
-                       attrs[j] = &type->event_descs[j].attr.attr;
-
-               type->events_group = attr_group;
-       }
-
-       type->pmu_group = &uncore_pmu_attr_group;
-       return 0;
-fail:
-       uncore_type_exit(type);
-       return -ENOMEM;
-}
-
-static int __init uncore_types_init(struct intel_uncore_type **types)
-{
-       int i, ret;
-
-       for (i = 0; types[i]; i++) {
-               ret = uncore_type_init(types[i]);
-               if (ret)
-                       goto fail;
-       }
-       return 0;
-fail:
-       while (--i >= 0)
-               uncore_type_exit(types[i]);
-       return ret;
-}
-
-/*
- * add a pci uncore device
- */
-static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       struct intel_uncore_type *type;
-       int phys_id;
-       bool first_box = false;
-
-       phys_id = uncore_pcibus_to_physid(pdev->bus);
-       if (phys_id < 0)
-               return -ENODEV;
-
-       if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
-               int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
-               uncore_extra_pci_dev[phys_id][idx] = pdev;
-               pci_set_drvdata(pdev, NULL);
-               return 0;
-       }
-
-       type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
-       box = uncore_alloc_box(type, NUMA_NO_NODE);
-       if (!box)
-               return -ENOMEM;
-
-       /*
-        * for performance monitoring unit with multiple boxes,
-        * each box has a different function id.
-        */
-       pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
-       /* Knights Landing uses a common PCI device ID for multiple instances of
-        * an uncore PMU device type. There is only one entry per device type in
-        * the knl_uncore_pci_ids table inspite of multiple devices present for
-        * some device types. Hence PCI device idx would be 0 for all devices.
-        * So increment pmu pointer to point to an unused array element.
-        */
-       if (boot_cpu_data.x86_model == 87)
-               while (pmu->func_id >= 0)
-                       pmu++;
-       if (pmu->func_id < 0)
-               pmu->func_id = pdev->devfn;
-       else
-               WARN_ON_ONCE(pmu->func_id != pdev->devfn);
-
-       box->phys_id = phys_id;
-       box->pci_dev = pdev;
-       box->pmu = pmu;
-       uncore_box_init(box);
-       pci_set_drvdata(pdev, box);
-
-       raw_spin_lock(&uncore_box_lock);
-       if (list_empty(&pmu->box_list))
-               first_box = true;
-       list_add_tail(&box->list, &pmu->box_list);
-       raw_spin_unlock(&uncore_box_lock);
-
-       if (first_box)
-               uncore_pmu_register(pmu);
-       return 0;
-}
-
-static void uncore_pci_remove(struct pci_dev *pdev)
-{
-       struct intel_uncore_box *box = pci_get_drvdata(pdev);
-       struct intel_uncore_pmu *pmu;
-       int i, cpu, phys_id;
-       bool last_box = false;
-
-       phys_id = uncore_pcibus_to_physid(pdev->bus);
-       box = pci_get_drvdata(pdev);
-       if (!box) {
-               for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
-                       if (uncore_extra_pci_dev[phys_id][i] == pdev) {
-                               uncore_extra_pci_dev[phys_id][i] = NULL;
-                               break;
-                       }
-               }
-               WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
-               return;
-       }
-
-       pmu = box->pmu;
-       if (WARN_ON_ONCE(phys_id != box->phys_id))
-               return;
-
-       pci_set_drvdata(pdev, NULL);
-
-       raw_spin_lock(&uncore_box_lock);
-       list_del(&box->list);
-       if (list_empty(&pmu->box_list))
-               last_box = true;
-       raw_spin_unlock(&uncore_box_lock);
-
-       for_each_possible_cpu(cpu) {
-               if (*per_cpu_ptr(pmu->box, cpu) == box) {
-                       *per_cpu_ptr(pmu->box, cpu) = NULL;
-                       atomic_dec(&box->refcnt);
-               }
-       }
-
-       WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
-       kfree(box);
-
-       if (last_box)
-               perf_pmu_unregister(&pmu->pmu);
-}
-
-static int __init uncore_pci_init(void)
-{
-       int ret;
-
-       switch (boot_cpu_data.x86_model) {
-       case 45: /* Sandy Bridge-EP */
-               ret = snbep_uncore_pci_init();
-               break;
-       case 62: /* Ivy Bridge-EP */
-               ret = ivbep_uncore_pci_init();
-               break;
-       case 63: /* Haswell-EP */
-               ret = hswep_uncore_pci_init();
-               break;
-       case 79: /* BDX-EP */
-       case 86: /* BDX-DE */
-               ret = bdx_uncore_pci_init();
-               break;
-       case 42: /* Sandy Bridge */
-               ret = snb_uncore_pci_init();
-               break;
-       case 58: /* Ivy Bridge */
-               ret = ivb_uncore_pci_init();
-               break;
-       case 60: /* Haswell */
-       case 69: /* Haswell Celeron */
-               ret = hsw_uncore_pci_init();
-               break;
-       case 61: /* Broadwell */
-               ret = bdw_uncore_pci_init();
-               break;
-       case 87: /* Knights Landing */
-               ret = knl_uncore_pci_init();
-               break;
-       case 94: /* SkyLake */
-               ret = skl_uncore_pci_init();
-               break;
-       default:
-               return 0;
-       }
-
-       if (ret)
-               return ret;
-
-       ret = uncore_types_init(uncore_pci_uncores);
-       if (ret)
-               return ret;
-
-       uncore_pci_driver->probe = uncore_pci_probe;
-       uncore_pci_driver->remove = uncore_pci_remove;
-
-       ret = pci_register_driver(uncore_pci_driver);
-       if (ret == 0)
-               pcidrv_registered = true;
-       else
-               uncore_types_exit(uncore_pci_uncores);
-
-       return ret;
-}
-
-static void __init uncore_pci_exit(void)
-{
-       if (pcidrv_registered) {
-               pcidrv_registered = false;
-               pci_unregister_driver(uncore_pci_driver);
-               uncore_types_exit(uncore_pci_uncores);
-       }
-}
-
-/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
-static LIST_HEAD(boxes_to_free);
-
-static void uncore_kfree_boxes(void)
-{
-       struct intel_uncore_box *box;
-
-       while (!list_empty(&boxes_to_free)) {
-               box = list_entry(boxes_to_free.next,
-                                struct intel_uncore_box, list);
-               list_del(&box->list);
-               kfree(box);
-       }
-}
-
-static void uncore_cpu_dying(int cpu)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       int i, j;
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       box = *per_cpu_ptr(pmu->box, cpu);
-                       *per_cpu_ptr(pmu->box, cpu) = NULL;
-                       if (box && atomic_dec_and_test(&box->refcnt))
-                               list_add(&box->list, &boxes_to_free);
-               }
-       }
-}
-
-static int uncore_cpu_starting(int cpu)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box, *exist;
-       int i, j, k, phys_id;
-
-       phys_id = topology_physical_package_id(cpu);
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       box = *per_cpu_ptr(pmu->box, cpu);
-                       /* called by uncore_cpu_init? */
-                       if (box && box->phys_id >= 0) {
-                               uncore_box_init(box);
-                               continue;
-                       }
-
-                       for_each_online_cpu(k) {
-                               exist = *per_cpu_ptr(pmu->box, k);
-                               if (exist && exist->phys_id == phys_id) {
-                                       atomic_inc(&exist->refcnt);
-                                       *per_cpu_ptr(pmu->box, cpu) = exist;
-                                       if (box) {
-                                               list_add(&box->list,
-                                                        &boxes_to_free);
-                                               box = NULL;
-                                       }
-                                       break;
-                               }
-                       }
-
-                       if (box) {
-                               box->phys_id = phys_id;
-                               uncore_box_init(box);
-                       }
-               }
-       }
-       return 0;
-}
-
-static int uncore_cpu_prepare(int cpu, int phys_id)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       int i, j;
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       if (pmu->func_id < 0)
-                               pmu->func_id = j;
-
-                       box = uncore_alloc_box(type, cpu_to_node(cpu));
-                       if (!box)
-                               return -ENOMEM;
-
-                       box->pmu = pmu;
-                       box->phys_id = phys_id;
-                       *per_cpu_ptr(pmu->box, cpu) = box;
-               }
-       }
-       return 0;
-}
-
-static void
-uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
-{
-       struct intel_uncore_type *type;
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       int i, j;
-
-       for (i = 0; uncores[i]; i++) {
-               type = uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       if (old_cpu < 0)
-                               box = uncore_pmu_to_box(pmu, new_cpu);
-                       else
-                               box = uncore_pmu_to_box(pmu, old_cpu);
-                       if (!box)
-                               continue;
-
-                       if (old_cpu < 0) {
-                               WARN_ON_ONCE(box->cpu != -1);
-                               box->cpu = new_cpu;
-                               continue;
-                       }
-
-                       WARN_ON_ONCE(box->cpu != old_cpu);
-                       if (new_cpu >= 0) {
-                               uncore_pmu_cancel_hrtimer(box);
-                               perf_pmu_migrate_context(&pmu->pmu,
-                                               old_cpu, new_cpu);
-                               box->cpu = new_cpu;
-                       } else {
-                               box->cpu = -1;
-                       }
-               }
-       }
-}
-
-static void uncore_event_exit_cpu(int cpu)
-{
-       int i, phys_id, target;
-
-       /* if exiting cpu is used for collecting uncore events */
-       if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-               return;
-
-       /* find a new cpu to collect uncore events */
-       phys_id = topology_physical_package_id(cpu);
-       target = -1;
-       for_each_online_cpu(i) {
-               if (i == cpu)
-                       continue;
-               if (phys_id == topology_physical_package_id(i)) {
-                       target = i;
-                       break;
-               }
-       }
-
-       /* migrate uncore events to the new cpu */
-       if (target >= 0)
-               cpumask_set_cpu(target, &uncore_cpu_mask);
-
-       uncore_change_context(uncore_msr_uncores, cpu, target);
-       uncore_change_context(uncore_pci_uncores, cpu, target);
-}
-
-static void uncore_event_init_cpu(int cpu)
-{
-       int i, phys_id;
-
-       phys_id = topology_physical_package_id(cpu);
-       for_each_cpu(i, &uncore_cpu_mask) {
-               if (phys_id == topology_physical_package_id(i))
-                       return;
-       }
-
-       cpumask_set_cpu(cpu, &uncore_cpu_mask);
-
-       uncore_change_context(uncore_msr_uncores, -1, cpu);
-       uncore_change_context(uncore_pci_uncores, -1, cpu);
-}
-
-static int uncore_cpu_notifier(struct notifier_block *self,
-                              unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (long)hcpu;
-
-       /* allocate/free data structure for uncore box */
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               uncore_cpu_prepare(cpu, -1);
-               break;
-       case CPU_STARTING:
-               uncore_cpu_starting(cpu);
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               uncore_cpu_dying(cpu);
-               break;
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               uncore_kfree_boxes();
-               break;
-       default:
-               break;
-       }
-
-       /* select the cpu that collects uncore events */
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_DOWN_FAILED:
-       case CPU_STARTING:
-               uncore_event_init_cpu(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               uncore_event_exit_cpu(cpu);
-               break;
-       default:
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block uncore_cpu_nb = {
-       .notifier_call  = uncore_cpu_notifier,
-       /*
-        * to migrate uncore events, our notifier should be executed
-        * before perf core's notifier.
-        */
-       .priority       = CPU_PRI_PERF + 1,
-};
-
-static void __init uncore_cpu_setup(void *dummy)
-{
-       uncore_cpu_starting(smp_processor_id());
-}
-
-static int __init uncore_cpu_init(void)
-{
-       int ret;
-
-       switch (boot_cpu_data.x86_model) {
-       case 26: /* Nehalem */
-       case 30:
-       case 37: /* Westmere */
-       case 44:
-               nhm_uncore_cpu_init();
-               break;
-       case 42: /* Sandy Bridge */
-       case 58: /* Ivy Bridge */
-       case 60: /* Haswell */
-       case 69: /* Haswell */
-       case 70: /* Haswell */
-       case 61: /* Broadwell */
-       case 71: /* Broadwell */
-               snb_uncore_cpu_init();
-               break;
-       case 45: /* Sandy Bridge-EP */
-               snbep_uncore_cpu_init();
-               break;
-       case 46: /* Nehalem-EX */
-       case 47: /* Westmere-EX aka. Xeon E7 */
-               nhmex_uncore_cpu_init();
-               break;
-       case 62: /* Ivy Bridge-EP */
-               ivbep_uncore_cpu_init();
-               break;
-       case 63: /* Haswell-EP */
-               hswep_uncore_cpu_init();
-               break;
-       case 79: /* BDX-EP */
-       case 86: /* BDX-DE */
-               bdx_uncore_cpu_init();
-               break;
-       case 87: /* Knights Landing */
-               knl_uncore_cpu_init();
-               break;
-       default:
-               return 0;
-       }
-
-       ret = uncore_types_init(uncore_msr_uncores);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-static int __init uncore_pmus_register(void)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_type *type;
-       int i, j;
-
-       for (i = 0; uncore_msr_uncores[i]; i++) {
-               type = uncore_msr_uncores[i];
-               for (j = 0; j < type->num_boxes; j++) {
-                       pmu = &type->pmus[j];
-                       uncore_pmu_register(pmu);
-               }
-       }
-
-       return 0;
-}
-
-static void __init uncore_cpumask_init(void)
-{
-       int cpu;
-
-       /*
-        * ony invoke once from msr or pci init code
-        */
-       if (!cpumask_empty(&uncore_cpu_mask))
-               return;
-
-       cpu_notifier_register_begin();
-
-       for_each_online_cpu(cpu) {
-               int i, phys_id = topology_physical_package_id(cpu);
-
-               for_each_cpu(i, &uncore_cpu_mask) {
-                       if (phys_id == topology_physical_package_id(i)) {
-                               phys_id = -1;
-                               break;
-                       }
-               }
-               if (phys_id < 0)
-                       continue;
-
-               uncore_cpu_prepare(cpu, phys_id);
-               uncore_event_init_cpu(cpu);
-       }
-       on_each_cpu(uncore_cpu_setup, NULL, 1);
-
-       __register_cpu_notifier(&uncore_cpu_nb);
-
-       cpu_notifier_register_done();
-}
-
-
-static int __init intel_uncore_init(void)
-{
-       int ret;
-
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-               return -ENODEV;
-
-       if (cpu_has_hypervisor)
-               return -ENODEV;
-
-       ret = uncore_pci_init();
-       if (ret)
-               goto fail;
-       ret = uncore_cpu_init();
-       if (ret) {
-               uncore_pci_exit();
-               goto fail;
-       }
-       uncore_cpumask_init();
-
-       uncore_pmus_register();
-       return 0;
-fail:
-       return ret;
-}
-device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
deleted file mode 100644 (file)
index a7086b8..0000000
+++ /dev/null
@@ -1,357 +0,0 @@
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/perf_event.h>
-#include "perf_event.h"
-
-#define UNCORE_PMU_NAME_LEN            32
-#define UNCORE_PMU_HRTIMER_INTERVAL    (60LL * NSEC_PER_SEC)
-#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
-
-#define UNCORE_FIXED_EVENT             0xff
-#define UNCORE_PMC_IDX_MAX_GENERIC     8
-#define UNCORE_PMC_IDX_FIXED           UNCORE_PMC_IDX_MAX_GENERIC
-#define UNCORE_PMC_IDX_MAX             (UNCORE_PMC_IDX_FIXED + 1)
-
-#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
-#define UNCORE_PCI_DEV_TYPE(data)      ((data >> 8) & 0xff)
-#define UNCORE_PCI_DEV_IDX(data)       (data & 0xff)
-#define UNCORE_EXTRA_PCI_DEV           0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX       3
-
-/* support up to 8 sockets */
-#define UNCORE_SOCKET_MAX              8
-
-#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
-
-struct intel_uncore_ops;
-struct intel_uncore_pmu;
-struct intel_uncore_box;
-struct uncore_event_desc;
-
-struct intel_uncore_type {
-       const char *name;
-       int num_counters;
-       int num_boxes;
-       int perf_ctr_bits;
-       int fixed_ctr_bits;
-       unsigned perf_ctr;
-       unsigned event_ctl;
-       unsigned event_mask;
-       unsigned fixed_ctr;
-       unsigned fixed_ctl;
-       unsigned box_ctl;
-       unsigned msr_offset;
-       unsigned num_shared_regs:8;
-       unsigned single_fixed:1;
-       unsigned pair_ctr_ctl:1;
-       unsigned *msr_offsets;
-       struct event_constraint unconstrainted;
-       struct event_constraint *constraints;
-       struct intel_uncore_pmu *pmus;
-       struct intel_uncore_ops *ops;
-       struct uncore_event_desc *event_descs;
-       const struct attribute_group *attr_groups[4];
-       struct pmu *pmu; /* for custom pmu ops */
-};
-
-#define pmu_group attr_groups[0]
-#define format_group attr_groups[1]
-#define events_group attr_groups[2]
-
-struct intel_uncore_ops {
-       void (*init_box)(struct intel_uncore_box *);
-       void (*disable_box)(struct intel_uncore_box *);
-       void (*enable_box)(struct intel_uncore_box *);
-       void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
-       void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
-       u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
-       int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
-       struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
-                                                  struct perf_event *);
-       void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
-};
-
-struct intel_uncore_pmu {
-       struct pmu pmu;
-       char name[UNCORE_PMU_NAME_LEN];
-       int pmu_idx;
-       int func_id;
-       struct intel_uncore_type *type;
-       struct intel_uncore_box ** __percpu box;
-       struct list_head box_list;
-};
-
-struct intel_uncore_extra_reg {
-       raw_spinlock_t lock;
-       u64 config, config1, config2;
-       atomic_t ref;
-};
-
-struct intel_uncore_box {
-       int phys_id;
-       int n_active;   /* number of active events */
-       int n_events;
-       int cpu;        /* cpu to collect events */
-       unsigned long flags;
-       atomic_t refcnt;
-       struct perf_event *events[UNCORE_PMC_IDX_MAX];
-       struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
-       struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];
-       unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
-       u64 tags[UNCORE_PMC_IDX_MAX];
-       struct pci_dev *pci_dev;
-       struct intel_uncore_pmu *pmu;
-       u64 hrtimer_duration; /* hrtimer timeout for this box */
-       struct hrtimer hrtimer;
-       struct list_head list;
-       struct list_head active_list;
-       void *io_addr;
-       struct intel_uncore_extra_reg shared_regs[0];
-};
-
-#define UNCORE_BOX_FLAG_INITIATED      0
-
-struct uncore_event_desc {
-       struct kobj_attribute attr;
-       const char *config;
-};
-
-struct pci2phy_map {
-       struct list_head list;
-       int segment;
-       int pbus_to_physid[256];
-};
-
-int uncore_pcibus_to_physid(struct pci_bus *bus);
-struct pci2phy_map *__find_pci2phy_map(int segment);
-
-ssize_t uncore_event_show(struct kobject *kobj,
-                         struct kobj_attribute *attr, char *buf);
-
-#define INTEL_UNCORE_EVENT_DESC(_name, _config)                        \
-{                                                              \
-       .attr   = __ATTR(_name, 0444, uncore_event_show, NULL), \
-       .config = _config,                                      \
-}
-
-#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)                        \
-static ssize_t __uncore_##_var##_show(struct kobject *kobj,            \
-                               struct kobj_attribute *attr,            \
-                               char *page)                             \
-{                                                                      \
-       BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);                     \
-       return sprintf(page, _format "\n");                             \
-}                                                                      \
-static struct kobj_attribute format_attr_##_var =                      \
-       __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
-
-static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
-{
-       return box->pmu->type->box_ctl;
-}
-
-static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctl;
-}
-
-static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctr;
-}
-
-static inline
-unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
-{
-       return idx * 4 + box->pmu->type->event_ctl;
-}
-
-static inline
-unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
-{
-       return idx * 8 + box->pmu->type->perf_ctr;
-}
-
-static inline unsigned uncore_msr_box_offset(struct intel_uncore_box *box)
-{
-       struct intel_uncore_pmu *pmu = box->pmu;
-       return pmu->type->msr_offsets ?
-               pmu->type->msr_offsets[pmu->pmu_idx] :
-               pmu->type->msr_offset * pmu->pmu_idx;
-}
-
-static inline unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
-{
-       if (!box->pmu->type->box_ctl)
-               return 0;
-       return box->pmu->type->box_ctl + uncore_msr_box_offset(box);
-}
-
-static inline unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
-{
-       if (!box->pmu->type->fixed_ctl)
-               return 0;
-       return box->pmu->type->fixed_ctl + uncore_msr_box_offset(box);
-}
-
-static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
-{
-       return box->pmu->type->event_ctl +
-               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-               uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
-{
-       return box->pmu->type->perf_ctr +
-               (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-               uncore_msr_box_offset(box);
-}
-
-static inline
-unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
-{
-       if (box->pci_dev)
-               return uncore_pci_fixed_ctl(box);
-       else
-               return uncore_msr_fixed_ctl(box);
-}
-
-static inline
-unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
-{
-       if (box->pci_dev)
-               return uncore_pci_fixed_ctr(box);
-       else
-               return uncore_msr_fixed_ctr(box);
-}
-
-static inline
-unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
-{
-       if (box->pci_dev)
-               return uncore_pci_event_ctl(box, idx);
-       else
-               return uncore_msr_event_ctl(box, idx);
-}
-
-static inline
-unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
-{
-       if (box->pci_dev)
-               return uncore_pci_perf_ctr(box, idx);
-       else
-               return uncore_msr_perf_ctr(box, idx);
-}
-
-static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
-{
-       return box->pmu->type->perf_ctr_bits;
-}
-
-static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
-{
-       return box->pmu->type->fixed_ctr_bits;
-}
-
-static inline int uncore_num_counters(struct intel_uncore_box *box)
-{
-       return box->pmu->type->num_counters;
-}
-
-static inline void uncore_disable_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->type->ops->disable_box)
-               box->pmu->type->ops->disable_box(box);
-}
-
-static inline void uncore_enable_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->type->ops->enable_box)
-               box->pmu->type->ops->enable_box(box);
-}
-
-static inline void uncore_disable_event(struct intel_uncore_box *box,
-                               struct perf_event *event)
-{
-       box->pmu->type->ops->disable_event(box, event);
-}
-
-static inline void uncore_enable_event(struct intel_uncore_box *box,
-                               struct perf_event *event)
-{
-       box->pmu->type->ops->enable_event(box, event);
-}
-
-static inline u64 uncore_read_counter(struct intel_uncore_box *box,
-                               struct perf_event *event)
-{
-       return box->pmu->type->ops->read_counter(box, event);
-}
-
-static inline void uncore_box_init(struct intel_uncore_box *box)
-{
-       if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
-               if (box->pmu->type->ops->init_box)
-                       box->pmu->type->ops->init_box(box);
-       }
-}
-
-static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
-{
-       return (box->phys_id < 0);
-}
-
-struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event);
-struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu);
-struct intel_uncore_box *uncore_event_to_box(struct perf_event *event);
-u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
-void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
-void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
-void uncore_pmu_event_read(struct perf_event *event);
-void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
-struct event_constraint *
-uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event);
-void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event);
-u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx);
-
-extern struct intel_uncore_type **uncore_msr_uncores;
-extern struct intel_uncore_type **uncore_pci_uncores;
-extern struct pci_driver *uncore_pci_driver;
-extern raw_spinlock_t pci2phy_map_lock;
-extern struct list_head pci2phy_map_head;
-extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
-extern struct event_constraint uncore_constraint_empty;
-
-/* perf_event_intel_uncore_snb.c */
-int snb_uncore_pci_init(void);
-int ivb_uncore_pci_init(void);
-int hsw_uncore_pci_init(void);
-int bdw_uncore_pci_init(void);
-int skl_uncore_pci_init(void);
-void snb_uncore_cpu_init(void);
-void nhm_uncore_cpu_init(void);
-int snb_pci2phy_map_init(int devid);
-
-/* perf_event_intel_uncore_snbep.c */
-int snbep_uncore_pci_init(void);
-void snbep_uncore_cpu_init(void);
-int ivbep_uncore_pci_init(void);
-void ivbep_uncore_cpu_init(void);
-int hswep_uncore_pci_init(void);
-void hswep_uncore_cpu_init(void);
-int bdx_uncore_pci_init(void);
-void bdx_uncore_cpu_init(void);
-int knl_uncore_pci_init(void);
-void knl_uncore_cpu_init(void);
-
-/* perf_event_intel_uncore_nhmex.c */
-void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c
deleted file mode 100644 (file)
index 2749965..0000000
+++ /dev/null
@@ -1,1221 +0,0 @@
-/* Nehalem-EX/Westmere-EX uncore support */
-#include "perf_event_intel_uncore.h"
-
-/* NHM-EX event control */
-#define NHMEX_PMON_CTL_EV_SEL_MASK     0x000000ff
-#define NHMEX_PMON_CTL_UMASK_MASK      0x0000ff00
-#define NHMEX_PMON_CTL_EN_BIT0         (1 << 0)
-#define NHMEX_PMON_CTL_EDGE_DET                (1 << 18)
-#define NHMEX_PMON_CTL_PMI_EN          (1 << 20)
-#define NHMEX_PMON_CTL_EN_BIT22                (1 << 22)
-#define NHMEX_PMON_CTL_INVERT          (1 << 23)
-#define NHMEX_PMON_CTL_TRESH_MASK      0xff000000
-#define NHMEX_PMON_RAW_EVENT_MASK      (NHMEX_PMON_CTL_EV_SEL_MASK | \
-                                        NHMEX_PMON_CTL_UMASK_MASK | \
-                                        NHMEX_PMON_CTL_EDGE_DET | \
-                                        NHMEX_PMON_CTL_INVERT | \
-                                        NHMEX_PMON_CTL_TRESH_MASK)
-
-/* NHM-EX Ubox */
-#define NHMEX_U_MSR_PMON_GLOBAL_CTL            0xc00
-#define NHMEX_U_MSR_PMON_CTR                   0xc11
-#define NHMEX_U_MSR_PMON_EV_SEL                        0xc10
-
-#define NHMEX_U_PMON_GLOBAL_EN                 (1 << 0)
-#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL       0x0000001e
-#define NHMEX_U_PMON_GLOBAL_EN_ALL             (1 << 28)
-#define NHMEX_U_PMON_GLOBAL_RST_ALL            (1 << 29)
-#define NHMEX_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
-
-#define NHMEX_U_PMON_RAW_EVENT_MASK            \
-               (NHMEX_PMON_CTL_EV_SEL_MASK |   \
-                NHMEX_PMON_CTL_EDGE_DET)
-
-/* NHM-EX Cbox */
-#define NHMEX_C0_MSR_PMON_GLOBAL_CTL           0xd00
-#define NHMEX_C0_MSR_PMON_CTR0                 0xd11
-#define NHMEX_C0_MSR_PMON_EV_SEL0              0xd10
-#define NHMEX_C_MSR_OFFSET                     0x20
-
-/* NHM-EX Bbox */
-#define NHMEX_B0_MSR_PMON_GLOBAL_CTL           0xc20
-#define NHMEX_B0_MSR_PMON_CTR0                 0xc31
-#define NHMEX_B0_MSR_PMON_CTL0                 0xc30
-#define NHMEX_B_MSR_OFFSET                     0x40
-#define NHMEX_B0_MSR_MATCH                     0xe45
-#define NHMEX_B0_MSR_MASK                      0xe46
-#define NHMEX_B1_MSR_MATCH                     0xe4d
-#define NHMEX_B1_MSR_MASK                      0xe4e
-
-#define NHMEX_B_PMON_CTL_EN                    (1 << 0)
-#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT          1
-#define NHMEX_B_PMON_CTL_EV_SEL_MASK           \
-               (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_B_PMON_CTR_SHIFT         6
-#define NHMEX_B_PMON_CTR_MASK          \
-               (0x3 << NHMEX_B_PMON_CTR_SHIFT)
-#define NHMEX_B_PMON_RAW_EVENT_MASK            \
-               (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
-                NHMEX_B_PMON_CTR_MASK)
-
-/* NHM-EX Sbox */
-#define NHMEX_S0_MSR_PMON_GLOBAL_CTL           0xc40
-#define NHMEX_S0_MSR_PMON_CTR0                 0xc51
-#define NHMEX_S0_MSR_PMON_CTL0                 0xc50
-#define NHMEX_S_MSR_OFFSET                     0x80
-#define NHMEX_S0_MSR_MM_CFG                    0xe48
-#define NHMEX_S0_MSR_MATCH                     0xe49
-#define NHMEX_S0_MSR_MASK                      0xe4a
-#define NHMEX_S1_MSR_MM_CFG                    0xe58
-#define NHMEX_S1_MSR_MATCH                     0xe59
-#define NHMEX_S1_MSR_MASK                      0xe5a
-
-#define NHMEX_S_PMON_MM_CFG_EN                 (0x1ULL << 63)
-#define NHMEX_S_EVENT_TO_R_PROG_EV             0
-
-/* NHM-EX Mbox */
-#define NHMEX_M0_MSR_GLOBAL_CTL                        0xca0
-#define NHMEX_M0_MSR_PMU_DSP                   0xca5
-#define NHMEX_M0_MSR_PMU_ISS                   0xca6
-#define NHMEX_M0_MSR_PMU_MAP                   0xca7
-#define NHMEX_M0_MSR_PMU_MSC_THR               0xca8
-#define NHMEX_M0_MSR_PMU_PGT                   0xca9
-#define NHMEX_M0_MSR_PMU_PLD                   0xcaa
-#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC           0xcab
-#define NHMEX_M0_MSR_PMU_CTL0                  0xcb0
-#define NHMEX_M0_MSR_PMU_CNT0                  0xcb1
-#define NHMEX_M_MSR_OFFSET                     0x40
-#define NHMEX_M0_MSR_PMU_MM_CFG                        0xe54
-#define NHMEX_M1_MSR_PMU_MM_CFG                        0xe5c
-
-#define NHMEX_M_PMON_MM_CFG_EN                 (1ULL << 63)
-#define NHMEX_M_PMON_ADDR_MATCH_MASK           0x3ffffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_MASK            0x7ffffffULL
-#define NHMEX_M_PMON_ADDR_MASK_SHIFT           34
-
-#define NHMEX_M_PMON_CTL_EN                    (1 << 0)
-#define NHMEX_M_PMON_CTL_PMI_EN                        (1 << 1)
-#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT      2
-#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK       \
-       (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT    4
-#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK     \
-       (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
-#define NHMEX_M_PMON_CTL_WRAP_MODE             (1 << 6)
-#define NHMEX_M_PMON_CTL_FLAG_MODE             (1 << 7)
-#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT         9
-#define NHMEX_M_PMON_CTL_INC_SEL_MASK          \
-       (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT    19
-#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK     \
-       (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
-#define NHMEX_M_PMON_RAW_EVENT_MASK                    \
-               (NHMEX_M_PMON_CTL_COUNT_MODE_MASK |     \
-                NHMEX_M_PMON_CTL_STORAGE_MODE_MASK |   \
-                NHMEX_M_PMON_CTL_WRAP_MODE |           \
-                NHMEX_M_PMON_CTL_FLAG_MODE |           \
-                NHMEX_M_PMON_CTL_INC_SEL_MASK |        \
-                NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
-
-#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
-
-#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK          (((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
-
-/*
- * use the 9~13 bits to select event If the 7th bit is not set,
- * otherwise use the 19~21 bits to select event.
- */
-#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
-#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
-                               NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
-                          NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
-                               NHMEX_M_PMON_CTL_FLAG_MODE)
-#define MBOX_INC_SEL_EXTAR_REG(c, r) \
-               EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
-                               MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
-#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
-               EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
-                               MBOX_SET_FLAG_SEL_MASK, \
-                               (u64)-1, NHMEX_M_##r)
-
-/* NHM-EX Rbox */
-#define NHMEX_R_MSR_GLOBAL_CTL                 0xe00
-#define NHMEX_R_MSR_PMON_CTL0                  0xe10
-#define NHMEX_R_MSR_PMON_CNT0                  0xe11
-#define NHMEX_R_MSR_OFFSET                     0x20
-
-#define NHMEX_R_MSR_PORTN_QLX_CFG(n)           \
-               ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n)                (0xe04 + (n))
-#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n)                (0xe24 + (n))
-#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n)                \
-               (((n) < 4 ? 0 : 0x10) + (n) * 4)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n)   \
-               (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n)    \
-               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n)     \
-               (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n)   \
-               (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n)    \
-               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
-#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n)     \
-               (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
-
-#define NHMEX_R_PMON_CTL_EN                    (1 << 0)
-#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT          1
-#define NHMEX_R_PMON_CTL_EV_SEL_MASK           \
-               (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
-#define NHMEX_R_PMON_CTL_PMI_EN                        (1 << 6)
-#define NHMEX_R_PMON_RAW_EVENT_MASK            NHMEX_R_PMON_CTL_EV_SEL_MASK
-
-/* NHM-EX Wbox */
-#define NHMEX_W_MSR_GLOBAL_CTL                 0xc80
-#define NHMEX_W_MSR_PMON_CNT0                  0xc90
-#define NHMEX_W_MSR_PMON_EVT_SEL0              0xc91
-#define NHMEX_W_MSR_PMON_FIXED_CTR             0x394
-#define NHMEX_W_MSR_PMON_FIXED_CTL             0x395
-
-#define NHMEX_W_PMON_GLOBAL_FIXED_EN           (1ULL << 31)
-
-#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
-                               ((1ULL << (n)) - 1)))
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
-DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
-
-static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
-}
-
-static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-       u64 config;
-
-       if (msr) {
-               rdmsrl(msr, config);
-               config &= ~((1ULL << uncore_num_counters(box)) - 1);
-               /* WBox has a fixed counter */
-               if (uncore_msr_fixed_ctl(box))
-                       config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
-               wrmsrl(msr, config);
-       }
-}
-
-static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-       u64 config;
-
-       if (msr) {
-               rdmsrl(msr, config);
-               config |= (1ULL << uncore_num_counters(box)) - 1;
-               /* WBox has a fixed counter */
-               if (uncore_msr_fixed_ctl(box))
-                       config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
-               wrmsrl(msr, config);
-       }
-}
-
-static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       wrmsrl(event->hw.config_base, 0);
-}
-
-static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
-               wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
-       else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
-               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
-       else
-               wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-#define NHMEX_UNCORE_OPS_COMMON_INIT()                         \
-       .init_box       = nhmex_uncore_msr_init_box,            \
-       .disable_box    = nhmex_uncore_msr_disable_box,         \
-       .enable_box     = nhmex_uncore_msr_enable_box,          \
-       .disable_event  = nhmex_uncore_msr_disable_event,       \
-       .read_counter   = uncore_msr_read_counter
-
-static struct intel_uncore_ops nhmex_uncore_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event   = nhmex_uncore_msr_enable_event,
-};
-
-static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_edge.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_ubox_format_group = {
-       .name           = "format",
-       .attrs          = nhmex_uncore_ubox_formats_attr,
-};
-
-static struct intel_uncore_type nhmex_uncore_ubox = {
-       .name           = "ubox",
-       .num_counters   = 1,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .event_ctl      = NHMEX_U_MSR_PMON_EV_SEL,
-       .perf_ctr       = NHMEX_U_MSR_PMON_CTR,
-       .event_mask     = NHMEX_U_PMON_RAW_EVENT_MASK,
-       .box_ctl        = NHMEX_U_MSR_PMON_GLOBAL_CTL,
-       .ops            = &nhmex_uncore_ops,
-       .format_group   = &nhmex_uncore_ubox_format_group
-};
-
-static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = nhmex_uncore_cbox_formats_attr,
-};
-
-/* msr offset for each instance of cbox */
-static unsigned nhmex_cbox_msr_offsets[] = {
-       0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0,
-};
-
-static struct intel_uncore_type nhmex_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 6,
-       .num_boxes              = 10,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_C0_MSR_PMON_EV_SEL0,
-       .perf_ctr               = NHMEX_C0_MSR_PMON_CTR0,
-       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
-       .msr_offsets            = nhmex_cbox_msr_offsets,
-       .pair_ctr_ctl           = 1,
-       .ops                    = &nhmex_uncore_ops,
-       .format_group           = &nhmex_uncore_cbox_format_group
-};
-
-static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_type nhmex_uncore_wbox = {
-       .name                   = "wbox",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_W_MSR_PMON_CNT0,
-       .perf_ctr               = NHMEX_W_MSR_PMON_EVT_SEL0,
-       .fixed_ctr              = NHMEX_W_MSR_PMON_FIXED_CTR,
-       .fixed_ctl              = NHMEX_W_MSR_PMON_FIXED_CTL,
-       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_W_MSR_GLOBAL_CTL,
-       .pair_ctr_ctl           = 1,
-       .event_descs            = nhmex_uncore_wbox_events,
-       .ops                    = &nhmex_uncore_ops,
-       .format_group           = &nhmex_uncore_cbox_format_group
-};
-
-static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       int ctr, ev_sel;
-
-       ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
-               NHMEX_B_PMON_CTR_SHIFT;
-       ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
-                 NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
-
-       /* events that do not use the match/mask registers */
-       if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
-           (ctr == 2 && ev_sel != 0x4) || ctr == 3)
-               return 0;
-
-       if (box->pmu->pmu_idx == 0)
-               reg1->reg = NHMEX_B0_MSR_MATCH;
-       else
-               reg1->reg = NHMEX_B1_MSR_MATCH;
-       reg1->idx = 0;
-       reg1->config = event->attr.config1;
-       reg2->config = event->attr.config2;
-       return 0;
-}
-
-static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               wrmsrl(reg1->reg, reg1->config);
-               wrmsrl(reg1->reg + 1, reg2->config);
-       }
-       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
-               (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
-}
-
-/*
- * The Bbox has 4 counters, but each counter monitors different events.
- * Use bits 6-7 in the event config to select counter.
- */
-static struct event_constraint nhmex_uncore_bbox_constraints[] = {
-       EVENT_CONSTRAINT(0 , 1, 0xc0),
-       EVENT_CONSTRAINT(0x40, 2, 0xc0),
-       EVENT_CONSTRAINT(0x80, 4, 0xc0),
-       EVENT_CONSTRAINT(0xc0, 8, 0xc0),
-       EVENT_CONSTRAINT_END,
-};
-
-static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
-       &format_attr_event5.attr,
-       &format_attr_counter.attr,
-       &format_attr_match.attr,
-       &format_attr_mask.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_bbox_format_group = {
-       .name = "format",
-       .attrs = nhmex_uncore_bbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event           = nhmex_bbox_msr_enable_event,
-       .hw_config              = nhmex_bbox_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_bbox = {
-       .name                   = "bbox",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_B0_MSR_PMON_CTL0,
-       .perf_ctr               = NHMEX_B0_MSR_PMON_CTR0,
-       .event_mask             = NHMEX_B_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
-       .msr_offset             = NHMEX_B_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 1,
-       .constraints            = nhmex_uncore_bbox_constraints,
-       .ops                    = &nhmex_uncore_bbox_ops,
-       .format_group           = &nhmex_uncore_bbox_format_group
-};
-
-static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       /* only TO_R_PROG_EV event uses the match/mask register */
-       if ((hwc->config & NHMEX_PMON_CTL_EV_SEL_MASK) !=
-           NHMEX_S_EVENT_TO_R_PROG_EV)
-               return 0;
-
-       if (box->pmu->pmu_idx == 0)
-               reg1->reg = NHMEX_S0_MSR_MM_CFG;
-       else
-               reg1->reg = NHMEX_S1_MSR_MM_CFG;
-       reg1->idx = 0;
-       reg1->config = event->attr.config1;
-       reg2->config = event->attr.config2;
-       return 0;
-}
-
-static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               wrmsrl(reg1->reg, 0);
-               wrmsrl(reg1->reg + 1, reg1->config);
-               wrmsrl(reg1->reg + 2, reg2->config);
-               wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
-       }
-       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
-}
-
-static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_match.attr,
-       &format_attr_mask.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_sbox_format_group = {
-       .name                   = "format",
-       .attrs                  = nhmex_uncore_sbox_formats_attr,
-};
-
-static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event           = nhmex_sbox_msr_enable_event,
-       .hw_config              = nhmex_sbox_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_sbox = {
-       .name                   = "sbox",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_S0_MSR_PMON_CTL0,
-       .perf_ctr               = NHMEX_S0_MSR_PMON_CTR0,
-       .event_mask             = NHMEX_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
-       .msr_offset             = NHMEX_S_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 1,
-       .ops                    = &nhmex_uncore_sbox_ops,
-       .format_group           = &nhmex_uncore_sbox_format_group
-};
-
-enum {
-       EXTRA_REG_NHMEX_M_FILTER,
-       EXTRA_REG_NHMEX_M_DSP,
-       EXTRA_REG_NHMEX_M_ISS,
-       EXTRA_REG_NHMEX_M_MAP,
-       EXTRA_REG_NHMEX_M_MSC_THR,
-       EXTRA_REG_NHMEX_M_PGT,
-       EXTRA_REG_NHMEX_M_PLD,
-       EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
-};
-
-static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
-       MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
-       MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
-       MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
-       MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
-       /* event 0xa uses two extra registers */
-       MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
-       MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
-       MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
-       /* events 0xd ~ 0x10 use the same extra register */
-       MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
-       MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
-       MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
-       EVENT_EXTRA_END
-};
-
-/* Nehalem-EX or Westmere-EX ? */
-static bool uncore_nhmex;
-
-static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
-{
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       bool ret = false;
-       u64 mask;
-
-       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
-               er = &box->shared_regs[idx];
-               raw_spin_lock_irqsave(&er->lock, flags);
-               if (!atomic_read(&er->ref) || er->config == config) {
-                       atomic_inc(&er->ref);
-                       er->config = config;
-                       ret = true;
-               }
-               raw_spin_unlock_irqrestore(&er->lock, flags);
-
-               return ret;
-       }
-       /*
-        * The ZDP_CTL_FVC MSR has 4 fields which are used to control
-        * events 0xd ~ 0x10. Besides these 4 fields, there are additional
-        * fields which are shared.
-        */
-       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-       if (WARN_ON_ONCE(idx >= 4))
-               return false;
-
-       /* mask of the shared fields */
-       if (uncore_nhmex)
-               mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
-       else
-               mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK;
-       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-
-       raw_spin_lock_irqsave(&er->lock, flags);
-       /* add mask of the non-shared field if it's in use */
-       if (__BITS_VALUE(atomic_read(&er->ref), idx, 8)) {
-               if (uncore_nhmex)
-                       mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-               else
-                       mask |= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-       }
-
-       if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
-               atomic_add(1 << (idx * 8), &er->ref);
-               if (uncore_nhmex)
-                       mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
-                               NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-               else
-                       mask = WSMEX_M_PMON_ZDP_CTL_FVC_MASK |
-                               WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-               er->config &= ~mask;
-               er->config |= (config & mask);
-               ret = true;
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       return ret;
-}
-
-static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
-{
-       struct intel_uncore_extra_reg *er;
-
-       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
-               er = &box->shared_regs[idx];
-               atomic_dec(&er->ref);
-               return;
-       }
-
-       idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-       atomic_sub(1 << (idx * 8), &er->ref);
-}
-
-static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
-       u64 config = reg1->config;
-
-       /* get the non-shared control bits and shift them */
-       idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-       if (uncore_nhmex)
-               config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-       else
-               config &= WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
-       if (new_idx > orig_idx) {
-               idx = new_idx - orig_idx;
-               config <<= 3 * idx;
-       } else {
-               idx = orig_idx - new_idx;
-               config >>= 3 * idx;
-       }
-
-       /* add the shared control bits back */
-       if (uncore_nhmex)
-               config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
-       else
-               config |= WSMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
-       config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
-       if (modify) {
-               /* adjust the main event selector */
-               if (new_idx > orig_idx)
-                       hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
-               else
-                       hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
-               reg1->config = config;
-               reg1->idx = ~0xff | new_idx;
-       }
-       return config;
-}
-
-static struct event_constraint *
-nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       int i, idx[2], alloc = 0;
-       u64 config1 = reg1->config;
-
-       idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
-       idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
-again:
-       for (i = 0; i < 2; i++) {
-               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
-                       idx[i] = 0xff;
-
-               if (idx[i] == 0xff)
-                       continue;
-
-               if (!nhmex_mbox_get_shared_reg(box, idx[i],
-                               __BITS_VALUE(config1, i, 32)))
-                       goto fail;
-               alloc |= (0x1 << i);
-       }
-
-       /* for the match/mask registers */
-       if (reg2->idx != EXTRA_REG_NONE &&
-           (uncore_box_is_fake(box) || !reg2->alloc) &&
-           !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
-               goto fail;
-
-       /*
-        * If it's a fake box -- as per validate_{group,event}() we
-        * shouldn't touch event state and we can avoid doing so
-        * since both will only call get_event_constraints() once
-        * on each event, this avoids the need for reg->alloc.
-        */
-       if (!uncore_box_is_fake(box)) {
-               if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
-                       nhmex_mbox_alter_er(event, idx[0], true);
-               reg1->alloc |= alloc;
-               if (reg2->idx != EXTRA_REG_NONE)
-                       reg2->alloc = 1;
-       }
-       return NULL;
-fail:
-       if (idx[0] != 0xff && !(alloc & 0x1) &&
-           idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
-               /*
-                * events 0xd ~ 0x10 are functional identical, but are
-                * controlled by different fields in the ZDP_CTL_FVC
-                * register. If we failed to take one field, try the
-                * rest 3 choices.
-                */
-               BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
-               idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-               idx[0] = (idx[0] + 1) % 4;
-               idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
-               if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
-                       config1 = nhmex_mbox_alter_er(event, idx[0], false);
-                       goto again;
-               }
-       }
-
-       if (alloc & 0x1)
-               nhmex_mbox_put_shared_reg(box, idx[0]);
-       if (alloc & 0x2)
-               nhmex_mbox_put_shared_reg(box, idx[1]);
-       return &uncore_constraint_empty;
-}
-
-static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-
-       if (uncore_box_is_fake(box))
-               return;
-
-       if (reg1->alloc & 0x1)
-               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
-       if (reg1->alloc & 0x2)
-               nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
-       reg1->alloc = 0;
-
-       if (reg2->alloc) {
-               nhmex_mbox_put_shared_reg(box, reg2->idx);
-               reg2->alloc = 0;
-       }
-}
-
-static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
-{
-       if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
-               return er->idx;
-       return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
-}
-
-static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_type *type = box->pmu->type;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       struct extra_reg *er;
-       unsigned msr;
-       int reg_idx = 0;
-       /*
-        * The mbox events may require 2 extra MSRs at the most. But only
-        * the lower 32 bits in these MSRs are significant, so we can use
-        * config1 to pass two MSRs' config.
-        */
-       for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               if (event->attr.config1 & ~er->valid_mask)
-                       return -EINVAL;
-
-               msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
-               if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
-                       return -EINVAL;
-
-               /* always use the 32~63 bits to pass the PLD config */
-               if (er->idx == EXTRA_REG_NHMEX_M_PLD)
-                       reg_idx = 1;
-               else if (WARN_ON_ONCE(reg_idx > 0))
-                       return -EINVAL;
-
-               reg1->idx &= ~(0xff << (reg_idx * 8));
-               reg1->reg &= ~(0xffff << (reg_idx * 16));
-               reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
-               reg1->reg |= msr << (reg_idx * 16);
-               reg1->config = event->attr.config1;
-               reg_idx++;
-       }
-       /*
-        * The mbox only provides ability to perform address matching
-        * for the PLD events.
-        */
-       if (reg_idx == 2) {
-               reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
-               if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
-                       reg2->config = event->attr.config2;
-               else
-                       reg2->config = ~0ULL;
-               if (box->pmu->pmu_idx == 0)
-                       reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
-               else
-                       reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
-       }
-       return 0;
-}
-
-static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
-{
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       u64 config;
-
-       if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
-               return box->shared_regs[idx].config;
-
-       er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
-       raw_spin_lock_irqsave(&er->lock, flags);
-       config = er->config;
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-       return config;
-}
-
-static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       int idx;
-
-       idx = __BITS_VALUE(reg1->idx, 0, 8);
-       if (idx != 0xff)
-               wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
-                       nhmex_mbox_shared_reg_config(box, idx));
-       idx = __BITS_VALUE(reg1->idx, 1, 8);
-       if (idx != 0xff)
-               wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
-                       nhmex_mbox_shared_reg_config(box, idx));
-
-       if (reg2->idx != EXTRA_REG_NONE) {
-               wrmsrl(reg2->reg, 0);
-               if (reg2->config != ~0ULL) {
-                       wrmsrl(reg2->reg + 1,
-                               reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
-                       wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
-                               (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
-                       wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
-               }
-       }
-
-       wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(count_mode,          count_mode,     "config:2-3");
-DEFINE_UNCORE_FORMAT_ATTR(storage_mode,                storage_mode,   "config:4-5");
-DEFINE_UNCORE_FORMAT_ATTR(wrap_mode,           wrap_mode,      "config:6");
-DEFINE_UNCORE_FORMAT_ATTR(flag_mode,           flag_mode,      "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(inc_sel,             inc_sel,        "config:9-13");
-DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel,                set_flag_sel,   "config:19-21");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cfg_en,       filter_cfg_en,  "config2:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_match,                filter_match,   "config2:0-33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_mask,         filter_mask,    "config2:34-61");
-DEFINE_UNCORE_FORMAT_ATTR(dsp,                 dsp,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(thr,                 thr,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(fvc,                 fvc,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pgt,                 pgt,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(map,                 map,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(iss,                 iss,            "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(pld,                 pld,            "config1:32-63");
-
-static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
-       &format_attr_count_mode.attr,
-       &format_attr_storage_mode.attr,
-       &format_attr_wrap_mode.attr,
-       &format_attr_flag_mode.attr,
-       &format_attr_inc_sel.attr,
-       &format_attr_set_flag_sel.attr,
-       &format_attr_filter_cfg_en.attr,
-       &format_attr_filter_match.attr,
-       &format_attr_filter_mask.attr,
-       &format_attr_dsp.attr,
-       &format_attr_thr.attr,
-       &format_attr_fvc.attr,
-       &format_attr_pgt.attr,
-       &format_attr_map.attr,
-       &format_attr_iss.attr,
-       &format_attr_pld.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_mbox_format_group = {
-       .name           = "format",
-       .attrs          = nhmex_uncore_mbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
-       { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc wsmex_uncore_mbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x5000"),
-       INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x5040"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event   = nhmex_mbox_msr_enable_event,
-       .hw_config      = nhmex_mbox_hw_config,
-       .get_constraint = nhmex_mbox_get_constraint,
-       .put_constraint = nhmex_mbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_mbox = {
-       .name                   = "mbox",
-       .num_counters           = 6,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_M0_MSR_PMU_CTL0,
-       .perf_ctr               = NHMEX_M0_MSR_PMU_CNT0,
-       .event_mask             = NHMEX_M_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_M0_MSR_GLOBAL_CTL,
-       .msr_offset             = NHMEX_M_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 8,
-       .event_descs            = nhmex_uncore_mbox_events,
-       .ops                    = &nhmex_uncore_mbox_ops,
-       .format_group           = &nhmex_uncore_mbox_format_group,
-};
-
-static void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       /* adjust the main event selector and extra register index */
-       if (reg1->idx % 2) {
-               reg1->idx--;
-               hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
-       } else {
-               reg1->idx++;
-               hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
-       }
-
-       /* adjust extra register config */
-       switch (reg1->idx % 6) {
-       case 2:
-               /* shift the 8~15 bits to the 0~7 bits */
-               reg1->config >>= 8;
-               break;
-       case 3:
-               /* shift the 0~7 bits to the 8~15 bits */
-               reg1->config <<= 8;
-               break;
-       }
-}
-
-/*
- * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
- * An event set consists of 6 events, the 3rd and 4th events in
- * an event set use the same extra register. So an event set uses
- * 5 extra registers.
- */
-static struct event_constraint *
-nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       struct intel_uncore_extra_reg *er;
-       unsigned long flags;
-       int idx, er_idx;
-       u64 config1;
-       bool ok = false;
-
-       if (!uncore_box_is_fake(box) && reg1->alloc)
-               return NULL;
-
-       idx = reg1->idx % 6;
-       config1 = reg1->config;
-again:
-       er_idx = idx;
-       /* the 3rd and 4th events use the same extra register */
-       if (er_idx > 2)
-               er_idx--;
-       er_idx += (reg1->idx / 6) * 5;
-
-       er = &box->shared_regs[er_idx];
-       raw_spin_lock_irqsave(&er->lock, flags);
-       if (idx < 2) {
-               if (!atomic_read(&er->ref) || er->config == reg1->config) {
-                       atomic_inc(&er->ref);
-                       er->config = reg1->config;
-                       ok = true;
-               }
-       } else if (idx == 2 || idx == 3) {
-               /*
-                * these two events use different fields in a extra register,
-                * the 0~7 bits and the 8~15 bits respectively.
-                */
-               u64 mask = 0xff << ((idx - 2) * 8);
-               if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
-                               !((er->config ^ config1) & mask)) {
-                       atomic_add(1 << ((idx - 2) * 8), &er->ref);
-                       er->config &= ~mask;
-                       er->config |= config1 & mask;
-                       ok = true;
-               }
-       } else {
-               if (!atomic_read(&er->ref) ||
-                               (er->config == (hwc->config >> 32) &&
-                                er->config1 == reg1->config &&
-                                er->config2 == reg2->config)) {
-                       atomic_inc(&er->ref);
-                       er->config = (hwc->config >> 32);
-                       er->config1 = reg1->config;
-                       er->config2 = reg2->config;
-                       ok = true;
-               }
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       if (!ok) {
-               /*
-                * The Rbox events are always in pairs. The paired
-                * events are functional identical, but use different
-                * extra registers. If we failed to take an extra
-                * register, try the alternative.
-                */
-               idx ^= 1;
-               if (idx != reg1->idx % 6) {
-                       if (idx == 2)
-                               config1 >>= 8;
-                       else if (idx == 3)
-                               config1 <<= 8;
-                       goto again;
-               }
-       } else {
-               if (!uncore_box_is_fake(box)) {
-                       if (idx != reg1->idx % 6)
-                               nhmex_rbox_alter_er(box, event);
-                       reg1->alloc = 1;
-               }
-               return NULL;
-       }
-       return &uncore_constraint_empty;
-}
-
-static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct intel_uncore_extra_reg *er;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       int idx, er_idx;
-
-       if (uncore_box_is_fake(box) || !reg1->alloc)
-               return;
-
-       idx = reg1->idx % 6;
-       er_idx = idx;
-       if (er_idx > 2)
-               er_idx--;
-       er_idx += (reg1->idx / 6) * 5;
-
-       er = &box->shared_regs[er_idx];
-       if (idx == 2 || idx == 3)
-               atomic_sub(1 << ((idx - 2) * 8), &er->ref);
-       else
-               atomic_dec(&er->ref);
-
-       reg1->alloc = 0;
-}
-
-static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
-       int idx;
-
-       idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
-               NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
-       if (idx >= 0x18)
-               return -EINVAL;
-
-       reg1->idx = idx;
-       reg1->config = event->attr.config1;
-
-       switch (idx % 6) {
-       case 4:
-       case 5:
-               hwc->config |= event->attr.config & (~0ULL << 32);
-               reg2->config = event->attr.config2;
-               break;
-       }
-       return 0;
-}
-
-static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-       int idx, port;
-
-       idx = reg1->idx;
-       port = idx / 6 + box->pmu->pmu_idx * 4;
-
-       switch (idx % 6) {
-       case 0:
-               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config);
-               break;
-       case 1:
-               wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config);
-               break;
-       case 2:
-       case 3:
-               wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port),
-                       uncore_shared_reg_config(box, 2 + (idx / 6) * 5));
-               break;
-       case 4:
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port),
-                       hwc->config >> 32);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config);
-               break;
-       case 5:
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port),
-                       hwc->config >> 32);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config);
-               wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config);
-               break;
-       }
-
-       wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
-               (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
-}
-
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config1:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
-DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
-DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
-
-static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
-       &format_attr_event5.attr,
-       &format_attr_xbr_mm_cfg.attr,
-       &format_attr_xbr_match.attr,
-       &format_attr_xbr_mask.attr,
-       &format_attr_qlx_cfg.attr,
-       &format_attr_iperf_cfg.attr,
-       NULL,
-};
-
-static struct attribute_group nhmex_uncore_rbox_format_group = {
-       .name = "format",
-       .attrs = nhmex_uncore_rbox_formats_attr,
-};
-
-static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
-       INTEL_UNCORE_EVENT_DESC(qpi0_flit_send,         "event=0x0,iperf_cfg=0x80000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi1_filt_send,         "event=0x6,iperf_cfg=0x80000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt,         "event=0x0,iperf_cfg=0x40000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt,         "event=0x6,iperf_cfg=0x40000000"),
-       INTEL_UNCORE_EVENT_DESC(qpi0_date_response,     "event=0x0,iperf_cfg=0xc4"),
-       INTEL_UNCORE_EVENT_DESC(qpi1_date_response,     "event=0x6,iperf_cfg=0xc4"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
-       NHMEX_UNCORE_OPS_COMMON_INIT(),
-       .enable_event           = nhmex_rbox_msr_enable_event,
-       .hw_config              = nhmex_rbox_hw_config,
-       .get_constraint         = nhmex_rbox_get_constraint,
-       .put_constraint         = nhmex_rbox_put_constraint,
-};
-
-static struct intel_uncore_type nhmex_uncore_rbox = {
-       .name                   = "rbox",
-       .num_counters           = 8,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = NHMEX_R_MSR_PMON_CTL0,
-       .perf_ctr               = NHMEX_R_MSR_PMON_CNT0,
-       .event_mask             = NHMEX_R_PMON_RAW_EVENT_MASK,
-       .box_ctl                = NHMEX_R_MSR_GLOBAL_CTL,
-       .msr_offset             = NHMEX_R_MSR_OFFSET,
-       .pair_ctr_ctl           = 1,
-       .num_shared_regs        = 20,
-       .event_descs            = nhmex_uncore_rbox_events,
-       .ops                    = &nhmex_uncore_rbox_ops,
-       .format_group           = &nhmex_uncore_rbox_format_group
-};
-
-static struct intel_uncore_type *nhmex_msr_uncores[] = {
-       &nhmex_uncore_ubox,
-       &nhmex_uncore_cbox,
-       &nhmex_uncore_bbox,
-       &nhmex_uncore_sbox,
-       &nhmex_uncore_mbox,
-       &nhmex_uncore_rbox,
-       &nhmex_uncore_wbox,
-       NULL,
-};
-
-void nhmex_uncore_cpu_init(void)
-{
-       if (boot_cpu_data.x86_model == 46)
-               uncore_nhmex = true;
-       else
-               nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events;
-       if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = nhmex_msr_uncores;
-}
-/* end of Nehalem-EX uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
deleted file mode 100644 (file)
index 2bd030d..0000000
+++ /dev/null
@@ -1,717 +0,0 @@
-/* Nehalem/SandBridge/Haswell uncore support */
-#include "perf_event_intel_uncore.h"
-
-/* Uncore IMC PCI IDs */
-#define PCI_DEVICE_ID_INTEL_SNB_IMC    0x0100
-#define PCI_DEVICE_ID_INTEL_IVB_IMC    0x0154
-#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150
-#define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
-#define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
-#define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
-
-/* SNB event control */
-#define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
-#define SNB_UNC_CTL_UMASK_MASK                 0x0000ff00
-#define SNB_UNC_CTL_EDGE_DET                   (1 << 18)
-#define SNB_UNC_CTL_EN                         (1 << 22)
-#define SNB_UNC_CTL_INVERT                     (1 << 23)
-#define SNB_UNC_CTL_CMASK_MASK                 0x1f000000
-#define NHM_UNC_CTL_CMASK_MASK                 0xff000000
-#define NHM_UNC_FIXED_CTR_CTL_EN               (1 << 0)
-
-#define SNB_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
-                                                SNB_UNC_CTL_UMASK_MASK | \
-                                                SNB_UNC_CTL_EDGE_DET | \
-                                                SNB_UNC_CTL_INVERT | \
-                                                SNB_UNC_CTL_CMASK_MASK)
-
-#define NHM_UNC_RAW_EVENT_MASK                 (SNB_UNC_CTL_EV_SEL_MASK | \
-                                                SNB_UNC_CTL_UMASK_MASK | \
-                                                SNB_UNC_CTL_EDGE_DET | \
-                                                SNB_UNC_CTL_INVERT | \
-                                                NHM_UNC_CTL_CMASK_MASK)
-
-/* SNB global control register */
-#define SNB_UNC_PERF_GLOBAL_CTL                 0x391
-#define SNB_UNC_FIXED_CTR_CTRL                  0x394
-#define SNB_UNC_FIXED_CTR                       0x395
-
-/* SNB uncore global control */
-#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1)
-#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29)
-
-/* SNB Cbo register */
-#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700
-#define SNB_UNC_CBO_0_PER_CTR0                  0x706
-#define SNB_UNC_CBO_MSR_OFFSET                  0x10
-
-/* SNB ARB register */
-#define SNB_UNC_ARB_PER_CTR0                   0x3b0
-#define SNB_UNC_ARB_PERFEVTSEL0                        0x3b2
-#define SNB_UNC_ARB_MSR_OFFSET                 0x10
-
-/* NHM global control register */
-#define NHM_UNC_PERF_GLOBAL_CTL                 0x391
-#define NHM_UNC_FIXED_CTR                       0x394
-#define NHM_UNC_FIXED_CTR_CTRL                  0x395
-
-/* NHM uncore global control */
-#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1)
-#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32)
-
-/* NHM uncore register */
-#define NHM_UNC_PERFEVTSEL0                     0x3c0
-#define NHM_UNC_UNCORE_PMC0                     0x3b0
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
-
-/* Sandy Bridge uncore support */
-static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
-               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
-       else
-               wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
-}
-
-static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       wrmsrl(event->hw.config_base, 0);
-}
-
-static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       if (box->pmu->pmu_idx == 0) {
-               wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
-                       SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
-       }
-}
-
-static struct uncore_event_desc snb_uncore_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
-       { /* end: all zeroes */ },
-};
-
-static struct attribute *snb_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask5.attr,
-       NULL,
-};
-
-static struct attribute_group snb_uncore_format_group = {
-       .name           = "format",
-       .attrs          = snb_uncore_formats_attr,
-};
-
-static struct intel_uncore_ops snb_uncore_msr_ops = {
-       .init_box       = snb_uncore_msr_init_box,
-       .disable_event  = snb_uncore_msr_disable_event,
-       .enable_event   = snb_uncore_msr_enable_event,
-       .read_counter   = uncore_msr_read_counter,
-};
-
-static struct event_constraint snb_uncore_arb_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snb_uncore_cbox = {
-       .name           = "cbox",
-       .num_counters   = 2,
-       .num_boxes      = 4,
-       .perf_ctr_bits  = 44,
-       .fixed_ctr_bits = 48,
-       .perf_ctr       = SNB_UNC_CBO_0_PER_CTR0,
-       .event_ctl      = SNB_UNC_CBO_0_PERFEVTSEL0,
-       .fixed_ctr      = SNB_UNC_FIXED_CTR,
-       .fixed_ctl      = SNB_UNC_FIXED_CTR_CTRL,
-       .single_fixed   = 1,
-       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
-       .msr_offset     = SNB_UNC_CBO_MSR_OFFSET,
-       .ops            = &snb_uncore_msr_ops,
-       .format_group   = &snb_uncore_format_group,
-       .event_descs    = snb_uncore_events,
-};
-
-static struct intel_uncore_type snb_uncore_arb = {
-       .name           = "arb",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .perf_ctr       = SNB_UNC_ARB_PER_CTR0,
-       .event_ctl      = SNB_UNC_ARB_PERFEVTSEL0,
-       .event_mask     = SNB_UNC_RAW_EVENT_MASK,
-       .msr_offset     = SNB_UNC_ARB_MSR_OFFSET,
-       .constraints    = snb_uncore_arb_constraints,
-       .ops            = &snb_uncore_msr_ops,
-       .format_group   = &snb_uncore_format_group,
-};
-
-static struct intel_uncore_type *snb_msr_uncores[] = {
-       &snb_uncore_cbox,
-       &snb_uncore_arb,
-       NULL,
-};
-
-void snb_uncore_cpu_init(void)
-{
-       uncore_msr_uncores = snb_msr_uncores;
-       if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-}
-
-enum {
-       SNB_PCI_UNCORE_IMC,
-};
-
-static struct uncore_event_desc snb_uncore_imc_events[] = {
-       INTEL_UNCORE_EVENT_DESC(data_reads,  "event=0x01"),
-       INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
-
-       INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
-       INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
-
-       { /* end: all zeroes */ },
-};
-
-#define SNB_UNCORE_PCI_IMC_EVENT_MASK          0xff
-#define SNB_UNCORE_PCI_IMC_BAR_OFFSET          0x48
-
-/* page size multiple covering all config regs */
-#define SNB_UNCORE_PCI_IMC_MAP_SIZE            0x6000
-
-#define SNB_UNCORE_PCI_IMC_DATA_READS          0x1
-#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE     0x5050
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES         0x2
-#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE    0x5054
-#define SNB_UNCORE_PCI_IMC_CTR_BASE            SNB_UNCORE_PCI_IMC_DATA_READS_BASE
-
-static struct attribute *snb_uncore_imc_formats_attr[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-
-static struct attribute_group snb_uncore_imc_format_group = {
-       .name = "format",
-       .attrs = snb_uncore_imc_formats_attr,
-};
-
-static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
-       resource_size_t addr;
-       u32 pci_dword;
-
-       pci_read_config_dword(pdev, where, &pci_dword);
-       addr = pci_dword;
-
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-       pci_read_config_dword(pdev, where + 4, &pci_dword);
-       addr |= ((resource_size_t)pci_dword << 32);
-#endif
-
-       addr &= ~(PAGE_SIZE - 1);
-
-       box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
-       box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
-}
-
-static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
-{}
-
-static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{}
-
-static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
-}
-
-/*
- * custom event_init() function because we define our own fixed, free
- * running counters, so we do not want to conflict with generic uncore
- * logic. Also simplifies processing
- */
-static int snb_uncore_imc_event_init(struct perf_event *event)
-{
-       struct intel_uncore_pmu *pmu;
-       struct intel_uncore_box *box;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
-       int idx, base;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       pmu = uncore_event_to_pmu(event);
-       /* no device found for this pmu */
-       if (pmu->func_id < 0)
-               return -ENOENT;
-
-       /* Sampling not supported yet */
-       if (hwc->sample_period)
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       /*
-        * Place all uncore events for a particular physical package
-        * onto a single cpu
-        */
-       if (event->cpu < 0)
-               return -EINVAL;
-
-       /* check only supported bits are set */
-       if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
-               return -EINVAL;
-
-       box = uncore_pmu_to_box(pmu, event->cpu);
-       if (!box || box->cpu < 0)
-               return -EINVAL;
-
-       event->cpu = box->cpu;
-
-       event->hw.idx = -1;
-       event->hw.last_tag = ~0ULL;
-       event->hw.extra_reg.idx = EXTRA_REG_NONE;
-       event->hw.branch_reg.idx = EXTRA_REG_NONE;
-       /*
-        * check event is known (whitelist, determines counter)
-        */
-       switch (cfg) {
-       case SNB_UNCORE_PCI_IMC_DATA_READS:
-               base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
-               idx = UNCORE_PMC_IDX_FIXED;
-               break;
-       case SNB_UNCORE_PCI_IMC_DATA_WRITES:
-               base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
-               idx = UNCORE_PMC_IDX_FIXED + 1;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       /* must be done before validate_group */
-       event->hw.event_base = base;
-       event->hw.config = cfg;
-       event->hw.idx = idx;
-
-       /* no group validation needed, we have free running counters */
-
-       return 0;
-}
-
-static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return 0;
-}
-
-static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       u64 count;
-
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
-       event->hw.state = 0;
-       box->n_active++;
-
-       list_add_tail(&event->active_entry, &box->active_list);
-
-       count = snb_uncore_imc_read_counter(box, event);
-       local64_set(&event->hw.prev_count, count);
-
-       if (box->n_active == 1)
-               uncore_pmu_start_hrtimer(box);
-}
-
-static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (!(hwc->state & PERF_HES_STOPPED)) {
-               box->n_active--;
-
-               WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-               hwc->state |= PERF_HES_STOPPED;
-
-               list_del(&event->active_entry);
-
-               if (box->n_active == 0)
-                       uncore_pmu_cancel_hrtimer(box);
-       }
-
-       if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-               /*
-                * Drain the remaining delta count out of a event
-                * that we are disabling:
-                */
-               uncore_perf_event_update(box, event);
-               hwc->state |= PERF_HES_UPTODATE;
-       }
-}
-
-static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (!box)
-               return -ENODEV;
-
-       hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-       if (!(flags & PERF_EF_START))
-               hwc->state |= PERF_HES_ARCH;
-
-       snb_uncore_imc_event_start(event, 0);
-
-       box->n_events++;
-
-       return 0;
-}
-
-static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
-{
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       int i;
-
-       snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < box->n_events; i++) {
-               if (event == box->event_list[i]) {
-                       --box->n_events;
-                       break;
-               }
-       }
-}
-
-int snb_pci2phy_map_init(int devid)
-{
-       struct pci_dev *dev = NULL;
-       struct pci2phy_map *map;
-       int bus, segment;
-
-       dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
-       if (!dev)
-               return -ENOTTY;
-
-       bus = dev->bus->number;
-       segment = pci_domain_nr(dev->bus);
-
-       raw_spin_lock(&pci2phy_map_lock);
-       map = __find_pci2phy_map(segment);
-       if (!map) {
-               raw_spin_unlock(&pci2phy_map_lock);
-               pci_dev_put(dev);
-               return -ENOMEM;
-       }
-       map->pbus_to_physid[bus] = 0;
-       raw_spin_unlock(&pci2phy_map_lock);
-
-       pci_dev_put(dev);
-
-       return 0;
-}
-
-static struct pmu snb_uncore_imc_pmu = {
-       .task_ctx_nr    = perf_invalid_context,
-       .event_init     = snb_uncore_imc_event_init,
-       .add            = snb_uncore_imc_event_add,
-       .del            = snb_uncore_imc_event_del,
-       .start          = snb_uncore_imc_event_start,
-       .stop           = snb_uncore_imc_event_stop,
-       .read           = uncore_pmu_event_read,
-};
-
-static struct intel_uncore_ops snb_uncore_imc_ops = {
-       .init_box       = snb_uncore_imc_init_box,
-       .enable_box     = snb_uncore_imc_enable_box,
-       .disable_box    = snb_uncore_imc_disable_box,
-       .disable_event  = snb_uncore_imc_disable_event,
-       .enable_event   = snb_uncore_imc_enable_event,
-       .hw_config      = snb_uncore_imc_hw_config,
-       .read_counter   = snb_uncore_imc_read_counter,
-};
-
-static struct intel_uncore_type snb_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .fixed_ctr_bits = 32,
-       .fixed_ctr      = SNB_UNCORE_PCI_IMC_CTR_BASE,
-       .event_descs    = snb_uncore_imc_events,
-       .format_group   = &snb_uncore_imc_format_group,
-       .perf_ctr       = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
-       .event_mask     = SNB_UNCORE_PCI_IMC_EVENT_MASK,
-       .ops            = &snb_uncore_imc_ops,
-       .pmu            = &snb_uncore_imc_pmu,
-};
-
-static struct intel_uncore_type *snb_pci_uncores[] = {
-       [SNB_PCI_UNCORE_IMC]    = &snb_uncore_imc,
-       NULL,
-};
-
-static const struct pci_device_id snb_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id ivb_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id hsw_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id bdw_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static const struct pci_device_id skl_uncore_pci_ids[] = {
-       { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
-       },
-       { /* end: all zeroes */ },
-};
-
-static struct pci_driver snb_uncore_pci_driver = {
-       .name           = "snb_uncore",
-       .id_table       = snb_uncore_pci_ids,
-};
-
-static struct pci_driver ivb_uncore_pci_driver = {
-       .name           = "ivb_uncore",
-       .id_table       = ivb_uncore_pci_ids,
-};
-
-static struct pci_driver hsw_uncore_pci_driver = {
-       .name           = "hsw_uncore",
-       .id_table       = hsw_uncore_pci_ids,
-};
-
-static struct pci_driver bdw_uncore_pci_driver = {
-       .name           = "bdw_uncore",
-       .id_table       = bdw_uncore_pci_ids,
-};
-
-static struct pci_driver skl_uncore_pci_driver = {
-       .name           = "skl_uncore",
-       .id_table       = skl_uncore_pci_ids,
-};
-
-struct imc_uncore_pci_dev {
-       __u32 pci_id;
-       struct pci_driver *driver;
-};
-#define IMC_DEV(a, d) \
-       { .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
-
-static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
-       IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
-       IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
-       IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
-       IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
-       IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
-       IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
-       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
-       {  /* end marker */ }
-};
-
-
-#define for_each_imc_pci_id(x, t) \
-       for (x = (t); (x)->pci_id; x++)
-
-static struct pci_driver *imc_uncore_find_dev(void)
-{
-       const struct imc_uncore_pci_dev *p;
-       int ret;
-
-       for_each_imc_pci_id(p, desktop_imc_pci_ids) {
-               ret = snb_pci2phy_map_init(p->pci_id);
-               if (ret == 0)
-                       return p->driver;
-       }
-       return NULL;
-}
-
-static int imc_uncore_pci_init(void)
-{
-       struct pci_driver *imc_drv = imc_uncore_find_dev();
-
-       if (!imc_drv)
-               return -ENODEV;
-
-       uncore_pci_uncores = snb_pci_uncores;
-       uncore_pci_driver = imc_drv;
-
-       return 0;
-}
-
-int snb_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-int ivb_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-int hsw_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-int bdw_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-int skl_uncore_pci_init(void)
-{
-       return imc_uncore_pci_init();
-}
-
-/* end of Sandy Bridge uncore support */
-
-/* Nehalem uncore support */
-static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
-       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
-}
-
-static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
-       wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
-}
-
-static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       if (hwc->idx < UNCORE_PMC_IDX_FIXED)
-               wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
-       else
-               wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
-}
-
-static struct attribute *nhm_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask8.attr,
-       NULL,
-};
-
-static struct attribute_group nhm_uncore_format_group = {
-       .name = "format",
-       .attrs = nhm_uncore_formats_attr,
-};
-
-static struct uncore_event_desc nhm_uncore_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0x00"),
-       INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"),
-       INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"),
-       INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_ops nhm_uncore_msr_ops = {
-       .disable_box    = nhm_uncore_msr_disable_box,
-       .enable_box     = nhm_uncore_msr_enable_box,
-       .disable_event  = snb_uncore_msr_disable_event,
-       .enable_event   = nhm_uncore_msr_enable_event,
-       .read_counter   = uncore_msr_read_counter,
-};
-
-static struct intel_uncore_type nhm_uncore = {
-       .name           = "",
-       .num_counters   = 8,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .event_ctl      = NHM_UNC_PERFEVTSEL0,
-       .perf_ctr       = NHM_UNC_UNCORE_PMC0,
-       .fixed_ctr      = NHM_UNC_FIXED_CTR,
-       .fixed_ctl      = NHM_UNC_FIXED_CTR_CTRL,
-       .event_mask     = NHM_UNC_RAW_EVENT_MASK,
-       .event_descs    = nhm_uncore_events,
-       .ops            = &nhm_uncore_msr_ops,
-       .format_group   = &nhm_uncore_format_group,
-};
-
-static struct intel_uncore_type *nhm_msr_uncores[] = {
-       &nhm_uncore,
-       NULL,
-};
-
-void nhm_uncore_cpu_init(void)
-{
-       uncore_msr_uncores = nhm_msr_uncores;
-}
-
-/* end of Nehalem uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
deleted file mode 100644 (file)
index 33acb88..0000000
+++ /dev/null
@@ -1,3126 +0,0 @@
-/* SandyBridge-EP/IvyTown uncore support */
-#include "perf_event_intel_uncore.h"
-
-
-/* SNB-EP Box level control */
-#define SNBEP_PMON_BOX_CTL_RST_CTRL    (1 << 0)
-#define SNBEP_PMON_BOX_CTL_RST_CTRS    (1 << 1)
-#define SNBEP_PMON_BOX_CTL_FRZ         (1 << 8)
-#define SNBEP_PMON_BOX_CTL_FRZ_EN      (1 << 16)
-#define SNBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
-                                        SNBEP_PMON_BOX_CTL_RST_CTRS | \
-                                        SNBEP_PMON_BOX_CTL_FRZ_EN)
-/* SNB-EP event control */
-#define SNBEP_PMON_CTL_EV_SEL_MASK     0x000000ff
-#define SNBEP_PMON_CTL_UMASK_MASK      0x0000ff00
-#define SNBEP_PMON_CTL_RST             (1 << 17)
-#define SNBEP_PMON_CTL_EDGE_DET                (1 << 18)
-#define SNBEP_PMON_CTL_EV_SEL_EXT      (1 << 21)
-#define SNBEP_PMON_CTL_EN              (1 << 22)
-#define SNBEP_PMON_CTL_INVERT          (1 << 23)
-#define SNBEP_PMON_CTL_TRESH_MASK      0xff000000
-#define SNBEP_PMON_RAW_EVENT_MASK      (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                        SNBEP_PMON_CTL_UMASK_MASK | \
-                                        SNBEP_PMON_CTL_EDGE_DET | \
-                                        SNBEP_PMON_CTL_INVERT | \
-                                        SNBEP_PMON_CTL_TRESH_MASK)
-
-/* SNB-EP Ubox event control */
-#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK                0x1f000000
-#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK                \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PMON_CTL_UMASK_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_PMON_CTL_INVERT | \
-                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-
-#define SNBEP_CBO_PMON_CTL_TID_EN              (1 << 19)
-#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK      (SNBEP_PMON_RAW_EVENT_MASK | \
-                                                SNBEP_CBO_PMON_CTL_TID_EN)
-
-/* SNB-EP PCU event control */
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK    0x0000c000
-#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK      0x1f000000
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT      (1 << 30)
-#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET    (1 << 31)
-#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT | \
-                                SNBEP_PMON_CTL_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-
-#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
-                               (SNBEP_PMON_RAW_EVENT_MASK | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT)
-
-/* SNB-EP pci control register */
-#define SNBEP_PCI_PMON_BOX_CTL                 0xf4
-#define SNBEP_PCI_PMON_CTL0                    0xd8
-/* SNB-EP pci counter register */
-#define SNBEP_PCI_PMON_CTR0                    0xa0
-
-/* SNB-EP home agent register */
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0       0x40
-#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1       0x44
-#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH      0x48
-/* SNB-EP memory controller register */
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL                0xf0
-#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR                0xd0
-/* SNB-EP QPI register */
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0         0x228
-#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1         0x22c
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0          0x238
-#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1          0x23c
-
-/* SNB-EP Ubox register */
-#define SNBEP_U_MSR_PMON_CTR0                  0xc16
-#define SNBEP_U_MSR_PMON_CTL0                  0xc10
-
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL                0xc08
-#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR                0xc09
-
-/* SNB-EP Cbo register */
-#define SNBEP_C0_MSR_PMON_CTR0                 0xd16
-#define SNBEP_C0_MSR_PMON_CTL0                 0xd10
-#define SNBEP_C0_MSR_PMON_BOX_CTL              0xd04
-#define SNBEP_C0_MSR_PMON_BOX_FILTER           0xd14
-#define SNBEP_CBO_MSR_OFFSET                   0x20
-
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_TID      0x1f
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_NID      0x3fc00
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE    0x7c0000
-#define SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC      0xff800000
-
-#define SNBEP_CBO_EVENT_EXTRA_REG(e, m, i) {   \
-       .event = (e),                           \
-       .msr = SNBEP_C0_MSR_PMON_BOX_FILTER,    \
-       .config_mask = (m),                     \
-       .idx = (i)                              \
-}
-
-/* SNB-EP PCU register */
-#define SNBEP_PCU_MSR_PMON_CTR0                        0xc36
-#define SNBEP_PCU_MSR_PMON_CTL0                        0xc30
-#define SNBEP_PCU_MSR_PMON_BOX_CTL             0xc24
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER          0xc34
-#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK     0xffffffff
-#define SNBEP_PCU_MSR_CORE_C3_CTR              0x3fc
-#define SNBEP_PCU_MSR_CORE_C6_CTR              0x3fd
-
-/* IVBEP event control */
-#define IVBEP_PMON_BOX_CTL_INT         (SNBEP_PMON_BOX_CTL_RST_CTRL | \
-                                        SNBEP_PMON_BOX_CTL_RST_CTRS)
-#define IVBEP_PMON_RAW_EVENT_MASK              (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                        SNBEP_PMON_CTL_UMASK_MASK | \
-                                        SNBEP_PMON_CTL_EDGE_DET | \
-                                        SNBEP_PMON_CTL_TRESH_MASK)
-/* IVBEP Ubox */
-#define IVBEP_U_MSR_PMON_GLOBAL_CTL            0xc00
-#define IVBEP_U_PMON_GLOBAL_FRZ_ALL            (1 << 31)
-#define IVBEP_U_PMON_GLOBAL_UNFRZ_ALL          (1 << 29)
-
-#define IVBEP_U_MSR_PMON_RAW_EVENT_MASK        \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PMON_CTL_UMASK_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
-/* IVBEP Cbo */
-#define IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK              (IVBEP_PMON_RAW_EVENT_MASK | \
-                                                SNBEP_CBO_PMON_CTL_TID_EN)
-
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x1fULL << 0)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 5)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x3fULL << 17)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
-#define IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
-
-/* IVBEP home agent */
-#define IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST                (1 << 16)
-#define IVBEP_HA_PCI_PMON_RAW_EVENT_MASK               \
-                               (IVBEP_PMON_RAW_EVENT_MASK | \
-                                IVBEP_HA_PCI_PMON_CTL_Q_OCC_RST)
-/* IVBEP PCU */
-#define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK      \
-                               (SNBEP_PMON_CTL_EV_SEL_MASK | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-/* IVBEP QPI */
-#define IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK      \
-                               (IVBEP_PMON_RAW_EVENT_MASK | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT)
-
-#define __BITS_VALUE(x, i, n)  ((typeof(x))(((x) >> ((i) * (n))) & \
-                               ((1ULL << (n)) - 1)))
-
-/* Haswell-EP Ubox */
-#define HSWEP_U_MSR_PMON_CTR0                  0x709
-#define HSWEP_U_MSR_PMON_CTL0                  0x705
-#define HSWEP_U_MSR_PMON_FILTER                        0x707
-
-#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL                0x703
-#define HSWEP_U_MSR_PMON_UCLK_FIXED_CTR                0x704
-
-#define HSWEP_U_MSR_PMON_BOX_FILTER_TID                (0x1 << 0)
-#define HSWEP_U_MSR_PMON_BOX_FILTER_CID                (0x1fULL << 1)
-#define HSWEP_U_MSR_PMON_BOX_FILTER_MASK \
-                                       (HSWEP_U_MSR_PMON_BOX_FILTER_TID | \
-                                        HSWEP_U_MSR_PMON_BOX_FILTER_CID)
-
-/* Haswell-EP CBo */
-#define HSWEP_C0_MSR_PMON_CTR0                 0xe08
-#define HSWEP_C0_MSR_PMON_CTL0                 0xe01
-#define HSWEP_C0_MSR_PMON_BOX_CTL                      0xe00
-#define HSWEP_C0_MSR_PMON_BOX_FILTER0          0xe05
-#define HSWEP_CBO_MSR_OFFSET                   0x10
-
-
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_TID              (0x3fULL << 0)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK     (0xfULL << 6)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE    (0x7fULL << 17)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NID              (0xffffULL << 32)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC              (0x1ffULL << 52)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_C6               (0x1ULL << 61)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_NC               (0x1ULL << 62)
-#define HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC     (0x1ULL << 63)
-
-
-/* Haswell-EP Sbox */
-#define HSWEP_S0_MSR_PMON_CTR0                 0x726
-#define HSWEP_S0_MSR_PMON_CTL0                 0x721
-#define HSWEP_S0_MSR_PMON_BOX_CTL                      0x720
-#define HSWEP_SBOX_MSR_OFFSET                  0xa
-#define HSWEP_S_MSR_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
-                                                SNBEP_CBO_PMON_CTL_TID_EN)
-
-/* Haswell-EP PCU */
-#define HSWEP_PCU_MSR_PMON_CTR0                        0x717
-#define HSWEP_PCU_MSR_PMON_CTL0                        0x711
-#define HSWEP_PCU_MSR_PMON_BOX_CTL             0x710
-#define HSWEP_PCU_MSR_PMON_BOX_FILTER          0x715
-
-/* KNL Ubox */
-#define KNL_U_MSR_PMON_RAW_EVENT_MASK \
-                                       (SNBEP_U_MSR_PMON_RAW_EVENT_MASK | \
-                                               SNBEP_CBO_PMON_CTL_TID_EN)
-/* KNL CHA */
-#define KNL_CHA_MSR_OFFSET                     0xc
-#define KNL_CHA_MSR_PMON_CTL_QOR               (1 << 16)
-#define KNL_CHA_MSR_PMON_RAW_EVENT_MASK \
-                                       (SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK | \
-                                        KNL_CHA_MSR_PMON_CTL_QOR)
-#define KNL_CHA_MSR_PMON_BOX_FILTER_TID                0x1ff
-#define KNL_CHA_MSR_PMON_BOX_FILTER_STATE      (7 << 18)
-#define KNL_CHA_MSR_PMON_BOX_FILTER_OP         (0xfffffe2aULL << 32)
-
-/* KNL EDC/MC UCLK */
-#define KNL_UCLK_MSR_PMON_CTR0_LOW             0x400
-#define KNL_UCLK_MSR_PMON_CTL0                 0x420
-#define KNL_UCLK_MSR_PMON_BOX_CTL              0x430
-#define KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW       0x44c
-#define KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL       0x454
-#define KNL_PMON_FIXED_CTL_EN                  0x1
-
-/* KNL EDC */
-#define KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW                0xa00
-#define KNL_EDC0_ECLK_MSR_PMON_CTL0            0xa20
-#define KNL_EDC0_ECLK_MSR_PMON_BOX_CTL         0xa30
-#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW  0xa3c
-#define KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL  0xa44
-
-/* KNL MC */
-#define KNL_MC0_CH0_MSR_PMON_CTR0_LOW          0xb00
-#define KNL_MC0_CH0_MSR_PMON_CTL0              0xb20
-#define KNL_MC0_CH0_MSR_PMON_BOX_CTL           0xb30
-#define KNL_MC0_CH0_MSR_PMON_FIXED_LOW         0xb3c
-#define KNL_MC0_CH0_MSR_PMON_FIXED_CTL         0xb44
-
-/* KNL IRP */
-#define KNL_IRP_PCI_PMON_BOX_CTL               0xf0
-#define KNL_IRP_PCI_PMON_RAW_EVENT_MASK                (SNBEP_PMON_RAW_EVENT_MASK | \
-                                                KNL_CHA_MSR_PMON_CTL_QOR)
-/* KNL PCU */
-#define KNL_PCU_PMON_CTL_EV_SEL_MASK           0x0000007f
-#define KNL_PCU_PMON_CTL_USE_OCC_CTR           (1 << 7)
-#define KNL_PCU_MSR_PMON_CTL_TRESH_MASK                0x3f000000
-#define KNL_PCU_MSR_PMON_RAW_EVENT_MASK        \
-                               (KNL_PCU_PMON_CTL_EV_SEL_MASK | \
-                                KNL_PCU_PMON_CTL_USE_OCC_CTR | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
-                                SNBEP_PMON_CTL_EDGE_DET | \
-                                SNBEP_CBO_PMON_CTL_TID_EN | \
-                                SNBEP_PMON_CTL_EV_SEL_EXT | \
-                                SNBEP_PMON_CTL_INVERT | \
-                                KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
-                                SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
-
-DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
-DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
-DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
-DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
-DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
-DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
-DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
-DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
-DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
-DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
-DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
-DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
-DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
-DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
-DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
-DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
-DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
-DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
-DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
-DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
-DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
-DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
-DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
-DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
-DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
-DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
-DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
-DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
-DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
-DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
-DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
-
-static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-       u32 config = 0;
-
-       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
-               config |= SNBEP_PMON_BOX_CTL_FRZ;
-               pci_write_config_dword(pdev, box_ctl, config);
-       }
-}
-
-static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-       u32 config = 0;
-
-       if (!pci_read_config_dword(pdev, box_ctl, &config)) {
-               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
-               pci_write_config_dword(pdev, box_ctl, config);
-       }
-}
-
-static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, hwc->config_base, hwc->config);
-}
-
-static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 count = 0;
-
-       pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
-       pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
-
-       return count;
-}
-
-static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-
-       pci_write_config_dword(pdev, box_ctl, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
-{
-       u64 config;
-       unsigned msr;
-
-       msr = uncore_msr_box_ctl(box);
-       if (msr) {
-               rdmsrl(msr, config);
-               config |= SNBEP_PMON_BOX_CTL_FRZ;
-               wrmsrl(msr, config);
-       }
-}
-
-static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
-{
-       u64 config;
-       unsigned msr;
-
-       msr = uncore_msr_box_ctl(box);
-       if (msr) {
-               rdmsrl(msr, config);
-               config &= ~SNBEP_PMON_BOX_CTL_FRZ;
-               wrmsrl(msr, config);
-       }
-}
-
-static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE)
-               wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0));
-
-       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
-                                       struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       wrmsrl(hwc->config_base, hwc->config);
-}
-
-static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-
-       if (msr)
-               wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
-}
-
-static struct attribute *snbep_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid.attr,
-       &format_attr_filter_nid.attr,
-       &format_attr_filter_state.attr,
-       &format_attr_filter_opc.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_pcu_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_occ_sel.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       &format_attr_occ_invert.attr,
-       &format_attr_occ_edge.attr,
-       &format_attr_filter_band0.attr,
-       &format_attr_filter_band1.attr,
-       &format_attr_filter_band2.attr,
-       &format_attr_filter_band3.attr,
-       NULL,
-};
-
-static struct attribute *snbep_uncore_qpi_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_match_rds.attr,
-       &format_attr_match_rnid30.attr,
-       &format_attr_match_rnid4.attr,
-       &format_attr_match_dnid.attr,
-       &format_attr_match_mc.attr,
-       &format_attr_match_opc.attr,
-       &format_attr_match_vnw.attr,
-       &format_attr_match0.attr,
-       &format_attr_match1.attr,
-       &format_attr_mask_rds.attr,
-       &format_attr_mask_rnid30.attr,
-       &format_attr_mask_rnid4.attr,
-       &format_attr_mask_dnid.attr,
-       &format_attr_mask_mc.attr,
-       &format_attr_mask_opc.attr,
-       &format_attr_mask_vnw.attr,
-       &format_attr_mask0.attr,
-       &format_attr_mask1.attr,
-       NULL,
-};
-
-static struct uncore_event_desc snbep_uncore_imc_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0x00"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
-       { /* end: all zeroes */ },
-};
-
-static struct uncore_event_desc snbep_uncore_qpi_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"),
-       INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
-       INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x102,umask=0x08"),
-       INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x103,umask=0x04"),
-       { /* end: all zeroes */ },
-};
-
-static struct attribute_group snbep_uncore_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_pcu_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group snbep_uncore_qpi_format_group = {
-       .name = "format",
-       .attrs = snbep_uncore_qpi_formats_attr,
-};
-
-#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                   \
-       .disable_box    = snbep_uncore_msr_disable_box,         \
-       .enable_box     = snbep_uncore_msr_enable_box,          \
-       .disable_event  = snbep_uncore_msr_disable_event,       \
-       .enable_event   = snbep_uncore_msr_enable_event,        \
-       .read_counter   = uncore_msr_read_counter
-
-#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
-       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),                   \
-       .init_box       = snbep_uncore_msr_init_box             \
-
-static struct intel_uncore_ops snbep_uncore_msr_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT()                     \
-       .init_box       = snbep_uncore_pci_init_box,            \
-       .disable_box    = snbep_uncore_pci_disable_box,         \
-       .enable_box     = snbep_uncore_pci_enable_box,          \
-       .disable_event  = snbep_uncore_pci_disable_event,       \
-       .read_counter   = snbep_uncore_pci_read_counter
-
-static struct intel_uncore_ops snbep_uncore_pci_ops = {
-       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
-       .enable_event   = snbep_uncore_pci_enable_event,        \
-};
-
-static struct event_constraint snbep_uncore_cbox_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
-       UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
-       UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
-       UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
-       EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2a, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type snbep_uncore_ubox = {
-       .name           = "ubox",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .fixed_ctr_bits = 48,
-       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
-       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
-       .event_mask     = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .ops            = &snbep_uncore_msr_ops,
-       .format_group   = &snbep_uncore_ubox_format_group,
-};
-
-static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x2),
-       EVENT_EXTRA_END
-};
-
-static void snbep_cbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-       int i;
-
-       if (uncore_box_is_fake(box))
-               return;
-
-       for (i = 0; i < 5; i++) {
-               if (reg1->alloc & (0x1 << i))
-                       atomic_sub(1 << (i * 6), &er->ref);
-       }
-       reg1->alloc = 0;
-}
-
-static struct event_constraint *
-__snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event,
-                           u64 (*cbox_filter_mask)(int fields))
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-       int i, alloc = 0;
-       unsigned long flags;
-       u64 mask;
-
-       if (reg1->idx == EXTRA_REG_NONE)
-               return NULL;
-
-       raw_spin_lock_irqsave(&er->lock, flags);
-       for (i = 0; i < 5; i++) {
-               if (!(reg1->idx & (0x1 << i)))
-                       continue;
-               if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
-                       continue;
-
-               mask = cbox_filter_mask(0x1 << i);
-               if (!__BITS_VALUE(atomic_read(&er->ref), i, 6) ||
-                   !((reg1->config ^ er->config) & mask)) {
-                       atomic_add(1 << (i * 6), &er->ref);
-                       er->config &= ~mask;
-                       er->config |= reg1->config & mask;
-                       alloc |= (0x1 << i);
-               } else {
-                       break;
-               }
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-       if (i < 5)
-               goto fail;
-
-       if (!uncore_box_is_fake(box))
-               reg1->alloc |= alloc;
-
-       return NULL;
-fail:
-       for (; i >= 0; i--) {
-               if (alloc & (0x1 << i))
-                       atomic_sub(1 << (i * 6), &er->ref);
-       }
-       return &uncore_constraint_empty;
-}
-
-static u64 snbep_cbox_filter_mask(int fields)
-{
-       u64 mask = 0;
-
-       if (fields & 0x1)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_NID;
-       if (fields & 0x4)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x8)
-               mask |= SNBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-
-       return mask;
-}
-
-static struct event_constraint *
-snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, snbep_cbox_filter_mask);
-}
-
-static int snbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = snbep_uncore_cbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
-                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & snbep_cbox_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_cbox_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = snbep_cbox_hw_config,
-       .get_constraint         = snbep_cbox_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 8,
-       .perf_ctr_bits          = 44,
-       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
-       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = snbep_uncore_cbox_constraints,
-       .ops                    = &snbep_uncore_cbox_ops,
-       .format_group           = &snbep_uncore_cbox_format_group,
-};
-
-static u64 snbep_pcu_alter_er(struct perf_event *event, int new_idx, bool modify)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       u64 config = reg1->config;
-
-       if (new_idx > reg1->idx)
-               config <<= 8 * (new_idx - reg1->idx);
-       else
-               config >>= 8 * (reg1->idx - new_idx);
-
-       if (modify) {
-               hwc->config += new_idx - reg1->idx;
-               reg1->config = config;
-               reg1->idx = new_idx;
-       }
-       return config;
-}
-
-static struct event_constraint *
-snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-       unsigned long flags;
-       int idx = reg1->idx;
-       u64 mask, config1 = reg1->config;
-       bool ok = false;
-
-       if (reg1->idx == EXTRA_REG_NONE ||
-           (!uncore_box_is_fake(box) && reg1->alloc))
-               return NULL;
-again:
-       mask = 0xffULL << (idx * 8);
-       raw_spin_lock_irqsave(&er->lock, flags);
-       if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
-           !((config1 ^ er->config) & mask)) {
-               atomic_add(1 << (idx * 8), &er->ref);
-               er->config &= ~mask;
-               er->config |= config1 & mask;
-               ok = true;
-       }
-       raw_spin_unlock_irqrestore(&er->lock, flags);
-
-       if (!ok) {
-               idx = (idx + 1) % 4;
-               if (idx != reg1->idx) {
-                       config1 = snbep_pcu_alter_er(event, idx, false);
-                       goto again;
-               }
-               return &uncore_constraint_empty;
-       }
-
-       if (!uncore_box_is_fake(box)) {
-               if (idx != reg1->idx)
-                       snbep_pcu_alter_er(event, idx, true);
-               reg1->alloc = 1;
-       }
-       return NULL;
-}
-
-static void snbep_pcu_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct intel_uncore_extra_reg *er = &box->shared_regs[0];
-
-       if (uncore_box_is_fake(box) || !reg1->alloc)
-               return;
-
-       atomic_sub(1 << (reg1->idx * 8), &er->ref);
-       reg1->alloc = 0;
-}
-
-static int snbep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
-
-       if (ev_sel >= 0xb && ev_sel <= 0xe) {
-               reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
-               reg1->idx = ev_sel - 0xb;
-               reg1->config = event->attr.config1 & (0xff << (reg1->idx * 8));
-       }
-       return 0;
-}
-
-static struct intel_uncore_ops snbep_uncore_pcu_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = snbep_pcu_hw_config,
-       .get_constraint         = snbep_pcu_get_constraint,
-       .put_constraint         = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type snbep_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_pcu_ops,
-       .format_group           = &snbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *snbep_msr_uncores[] = {
-       &snbep_uncore_ubox,
-       &snbep_uncore_cbox,
-       &snbep_uncore_pcu,
-       NULL,
-};
-
-void snbep_uncore_cpu_init(void)
-{
-       if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = snbep_msr_uncores;
-}
-
-enum {
-       SNBEP_PCI_QPI_PORT0_FILTER,
-       SNBEP_PCI_QPI_PORT1_FILTER,
-       HSWEP_PCI_PCU_3,
-};
-
-static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
-               reg1->idx = 0;
-               reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
-               reg1->config = event->attr.config1;
-               reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
-               reg2->config = event->attr.config2;
-       }
-       return 0;
-}
-
-static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
-               struct pci_dev *filter_pdev = uncore_extra_pci_dev[box->phys_id][idx];
-               if (filter_pdev) {
-                       pci_write_config_dword(filter_pdev, reg1->reg,
-                                               (u32)reg1->config);
-                       pci_write_config_dword(filter_pdev, reg1->reg + 4,
-                                               (u32)(reg1->config >> 32));
-                       pci_write_config_dword(filter_pdev, reg2->reg,
-                                               (u32)reg2->config);
-                       pci_write_config_dword(filter_pdev, reg2->reg + 4,
-                                               (u32)(reg2->config >> 32));
-               }
-       }
-
-       pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops snbep_uncore_qpi_ops = {
-       SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
-       .enable_event           = snbep_qpi_enable_event,
-       .hw_config              = snbep_qpi_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-#define SNBEP_UNCORE_PCI_COMMON_INIT()                         \
-       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
-       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
-       .event_mask     = SNBEP_PMON_RAW_EVENT_MASK,            \
-       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
-       .ops            = &snbep_uncore_pci_ops,                \
-       .format_group   = &snbep_uncore_format_group
-
-static struct intel_uncore_type snbep_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 4,
-       .num_boxes      = 4,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = snbep_uncore_imc_events,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_qpi_ops,
-       .event_descs            = snbep_uncore_qpi_events,
-       .format_group           = &snbep_uncore_qpi_format_group,
-};
-
-
-static struct intel_uncore_type snbep_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type snbep_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 3,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r3qpi_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       SNBEP_PCI_UNCORE_HA,
-       SNBEP_PCI_UNCORE_IMC,
-       SNBEP_PCI_UNCORE_QPI,
-       SNBEP_PCI_UNCORE_R2PCIE,
-       SNBEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *snbep_pci_uncores[] = {
-       [SNBEP_PCI_UNCORE_HA]           = &snbep_uncore_ha,
-       [SNBEP_PCI_UNCORE_IMC]          = &snbep_uncore_imc,
-       [SNBEP_PCI_UNCORE_QPI]          = &snbep_uncore_qpi,
-       [SNBEP_PCI_UNCORE_R2PCIE]       = &snbep_uncore_r2pcie,
-       [SNBEP_PCI_UNCORE_R3QPI]        = &snbep_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id snbep_uncore_pci_ids[] = {
-       { /* Home Agent */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
-       },
-       { /* MC Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
-       },
-       { /* QPI Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
-               .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT0_FILTER),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT1_FILTER),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver snbep_uncore_pci_driver = {
-       .name           = "snbep_uncore",
-       .id_table       = snbep_uncore_pci_ids,
-};
-
-/*
- * build pci bus to socket mapping
- */
-static int snbep_pci2phy_map_init(int devid)
-{
-       struct pci_dev *ubox_dev = NULL;
-       int i, bus, nodeid, segment;
-       struct pci2phy_map *map;
-       int err = 0;
-       u32 config = 0;
-
-       while (1) {
-               /* find the UBOX device */
-               ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, ubox_dev);
-               if (!ubox_dev)
-                       break;
-               bus = ubox_dev->bus->number;
-               /* get the Node ID of the local register */
-               err = pci_read_config_dword(ubox_dev, 0x40, &config);
-               if (err)
-                       break;
-               nodeid = config;
-               /* get the Node ID mapping */
-               err = pci_read_config_dword(ubox_dev, 0x54, &config);
-               if (err)
-                       break;
-
-               segment = pci_domain_nr(ubox_dev->bus);
-               raw_spin_lock(&pci2phy_map_lock);
-               map = __find_pci2phy_map(segment);
-               if (!map) {
-                       raw_spin_unlock(&pci2phy_map_lock);
-                       err = -ENOMEM;
-                       break;
-               }
-
-               /*
-                * every three bits in the Node ID mapping register maps
-                * to a particular node.
-                */
-               for (i = 0; i < 8; i++) {
-                       if (nodeid == ((config >> (3 * i)) & 0x7)) {
-                               map->pbus_to_physid[bus] = i;
-                               break;
-                       }
-               }
-               raw_spin_unlock(&pci2phy_map_lock);
-       }
-
-       if (!err) {
-               /*
-                * For PCI bus with no UBOX device, find the next bus
-                * that has UBOX device and use its mapping.
-                */
-               raw_spin_lock(&pci2phy_map_lock);
-               list_for_each_entry(map, &pci2phy_map_head, list) {
-                       i = -1;
-                       for (bus = 255; bus >= 0; bus--) {
-                               if (map->pbus_to_physid[bus] >= 0)
-                                       i = map->pbus_to_physid[bus];
-                               else
-                                       map->pbus_to_physid[bus] = i;
-                       }
-               }
-               raw_spin_unlock(&pci2phy_map_lock);
-       }
-
-       pci_dev_put(ubox_dev);
-
-       return err ? pcibios_err_to_errno(err) : 0;
-}
-
-int snbep_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x3ce0);
-       if (ret)
-               return ret;
-       uncore_pci_uncores = snbep_pci_uncores;
-       uncore_pci_driver = &snbep_uncore_pci_driver;
-       return 0;
-}
-/* end of Sandy Bridge-EP uncore support */
-
-/* IvyTown uncore support */
-static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-       if (msr)
-               wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT);
-}
-
-static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-
-       pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
-}
-
-#define IVBEP_UNCORE_MSR_OPS_COMMON_INIT()                     \
-       .init_box       = ivbep_uncore_msr_init_box,            \
-       .disable_box    = snbep_uncore_msr_disable_box,         \
-       .enable_box     = snbep_uncore_msr_enable_box,          \
-       .disable_event  = snbep_uncore_msr_disable_event,       \
-       .enable_event   = snbep_uncore_msr_enable_event,        \
-       .read_counter   = uncore_msr_read_counter
-
-static struct intel_uncore_ops ivbep_uncore_msr_ops = {
-       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-};
-
-static struct intel_uncore_ops ivbep_uncore_pci_ops = {
-       .init_box       = ivbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = snbep_uncore_pci_disable_event,
-       .enable_event   = snbep_uncore_pci_enable_event,
-       .read_counter   = snbep_uncore_pci_read_counter,
-};
-
-#define IVBEP_UNCORE_PCI_COMMON_INIT()                         \
-       .perf_ctr       = SNBEP_PCI_PMON_CTR0,                  \
-       .event_ctl      = SNBEP_PCI_PMON_CTL0,                  \
-       .event_mask     = IVBEP_PMON_RAW_EVENT_MASK,            \
-       .box_ctl        = SNBEP_PCI_PMON_BOX_CTL,               \
-       .ops            = &ivbep_uncore_pci_ops,                        \
-       .format_group   = &ivbep_uncore_format_group
-
-static struct attribute *ivbep_uncore_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid.attr,
-       &format_attr_filter_link.attr,
-       &format_attr_filter_state2.attr,
-       &format_attr_filter_nid2.attr,
-       &format_attr_filter_opc2.attr,
-       &format_attr_filter_nc.attr,
-       &format_attr_filter_c6.attr,
-       &format_attr_filter_isoc.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_pcu_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_occ_sel.attr,
-       &format_attr_edge.attr,
-       &format_attr_thresh5.attr,
-       &format_attr_occ_invert.attr,
-       &format_attr_occ_edge.attr,
-       &format_attr_filter_band0.attr,
-       &format_attr_filter_band1.attr,
-       &format_attr_filter_band2.attr,
-       &format_attr_filter_band3.attr,
-       NULL,
-};
-
-static struct attribute *ivbep_uncore_qpi_formats_attr[] = {
-       &format_attr_event_ext.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_match_rds.attr,
-       &format_attr_match_rnid30.attr,
-       &format_attr_match_rnid4.attr,
-       &format_attr_match_dnid.attr,
-       &format_attr_match_mc.attr,
-       &format_attr_match_opc.attr,
-       &format_attr_match_vnw.attr,
-       &format_attr_match0.attr,
-       &format_attr_match1.attr,
-       &format_attr_mask_rds.attr,
-       &format_attr_mask_rnid30.attr,
-       &format_attr_mask_rnid4.attr,
-       &format_attr_mask_dnid.attr,
-       &format_attr_mask_mc.attr,
-       &format_attr_mask_opc.attr,
-       &format_attr_mask_vnw.attr,
-       &format_attr_mask0.attr,
-       &format_attr_mask1.attr,
-       NULL,
-};
-
-static struct attribute_group ivbep_uncore_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_ubox_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_cbox_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_pcu_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_pcu_formats_attr,
-};
-
-static struct attribute_group ivbep_uncore_qpi_format_group = {
-       .name = "format",
-       .attrs = ivbep_uncore_qpi_formats_attr,
-};
-
-static struct intel_uncore_type ivbep_uncore_ubox = {
-       .name           = "ubox",
-       .num_counters   = 2,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .fixed_ctr_bits = 48,
-       .perf_ctr       = SNBEP_U_MSR_PMON_CTR0,
-       .event_ctl      = SNBEP_U_MSR_PMON_CTL0,
-       .event_mask     = IVBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl      = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .ops            = &ivbep_uncore_msr_ops,
-       .format_group   = &ivbep_uncore_ubox_format_group,
-};
-
-static struct extra_reg ivbep_uncore_cbox_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
-       EVENT_EXTRA_END
-};
-
-static u64 ivbep_cbox_filter_mask(int fields)
-{
-       u64 mask = 0;
-
-       if (fields & 0x1)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_LINK;
-       if (fields & 0x4)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x8)
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NID;
-       if (fields & 0x10) {
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_NC;
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_C6;
-               mask |= IVBEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
-       }
-
-       return mask;
-}
-
-static struct event_constraint *
-ivbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, ivbep_cbox_filter_mask);
-}
-
-static int ivbep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = ivbep_uncore_cbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
-                       SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & ivbep_cbox_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               u64 filter = uncore_shared_reg_config(box, 0);
-               wrmsrl(reg1->reg, filter & 0xffffffff);
-               wrmsrl(reg1->reg + 6, filter >> 32);
-       }
-
-       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops ivbep_uncore_cbox_ops = {
-       .init_box               = ivbep_uncore_msr_init_box,
-       .disable_box            = snbep_uncore_msr_disable_box,
-       .enable_box             = snbep_uncore_msr_enable_box,
-       .disable_event          = snbep_uncore_msr_disable_event,
-       .enable_event           = ivbep_cbox_enable_event,
-       .read_counter           = uncore_msr_read_counter,
-       .hw_config              = ivbep_cbox_hw_config,
-       .get_constraint         = ivbep_cbox_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 15,
-       .perf_ctr_bits          = 44,
-       .event_ctl              = SNBEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = SNBEP_C0_MSR_PMON_CTR0,
-       .event_mask             = IVBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = SNBEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = snbep_uncore_cbox_constraints,
-       .ops                    = &ivbep_uncore_cbox_ops,
-       .format_group           = &ivbep_uncore_cbox_format_group,
-};
-
-static struct intel_uncore_ops ivbep_uncore_pcu_ops = {
-       IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = snbep_pcu_hw_config,
-       .get_constraint         = snbep_pcu_get_constraint,
-       .put_constraint         = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = SNBEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCU_MSR_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &ivbep_uncore_pcu_ops,
-       .format_group           = &ivbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *ivbep_msr_uncores[] = {
-       &ivbep_uncore_ubox,
-       &ivbep_uncore_cbox,
-       &ivbep_uncore_pcu,
-       NULL,
-};
-
-void ivbep_uncore_cpu_init(void)
-{
-       if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = ivbep_msr_uncores;
-}
-
-static struct intel_uncore_type ivbep_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 4,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 48,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivbep_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 4,
-       .num_boxes      = 8,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = snbep_uncore_imc_events,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-/* registers in IRP boxes are not properly aligned */
-static unsigned ivbep_uncore_irp_ctls[] = {0xd8, 0xdc, 0xe0, 0xe4};
-static unsigned ivbep_uncore_irp_ctrs[] = {0xa0, 0xb0, 0xb8, 0xc0};
-
-static void ivbep_uncore_irp_enable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx],
-                              hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static void ivbep_uncore_irp_disable_event(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       pci_write_config_dword(pdev, ivbep_uncore_irp_ctls[hwc->idx], hwc->config);
-}
-
-static u64 ivbep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 count = 0;
-
-       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
-       pci_read_config_dword(pdev, ivbep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
-
-       return count;
-}
-
-static struct intel_uncore_ops ivbep_uncore_irp_ops = {
-       .init_box       = ivbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = ivbep_uncore_irp_disable_event,
-       .enable_event   = ivbep_uncore_irp_enable_event,
-       .read_counter   = ivbep_uncore_irp_read_counter,
-};
-
-static struct intel_uncore_type ivbep_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_mask             = IVBEP_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .ops                    = &ivbep_uncore_irp_ops,
-       .format_group           = &ivbep_uncore_format_group,
-};
-
-static struct intel_uncore_ops ivbep_uncore_qpi_ops = {
-       .init_box       = ivbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = snbep_uncore_pci_disable_event,
-       .enable_event   = snbep_qpi_enable_event,
-       .read_counter   = snbep_uncore_pci_read_counter,
-       .hw_config      = snbep_qpi_hw_config,
-       .get_constraint = uncore_get_constraint,
-       .put_constraint = uncore_put_constraint,
-};
-
-static struct intel_uncore_type ivbep_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 4,
-       .num_boxes              = 3,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = IVBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &ivbep_uncore_qpi_ops,
-       .format_group           = &ivbep_uncore_qpi_format_group,
-};
-
-static struct intel_uncore_type ivbep_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r2pcie_constraints,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type ivbep_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 3,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 44,
-       .constraints    = snbep_uncore_r3qpi_constraints,
-       IVBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       IVBEP_PCI_UNCORE_HA,
-       IVBEP_PCI_UNCORE_IMC,
-       IVBEP_PCI_UNCORE_IRP,
-       IVBEP_PCI_UNCORE_QPI,
-       IVBEP_PCI_UNCORE_R2PCIE,
-       IVBEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *ivbep_pci_uncores[] = {
-       [IVBEP_PCI_UNCORE_HA]   = &ivbep_uncore_ha,
-       [IVBEP_PCI_UNCORE_IMC]  = &ivbep_uncore_imc,
-       [IVBEP_PCI_UNCORE_IRP]  = &ivbep_uncore_irp,
-       [IVBEP_PCI_UNCORE_QPI]  = &ivbep_uncore_qpi,
-       [IVBEP_PCI_UNCORE_R2PCIE]       = &ivbep_uncore_r2pcie,
-       [IVBEP_PCI_UNCORE_R3QPI]        = &ivbep_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id ivbep_uncore_pci_ids[] = {
-       { /* Home Agent 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 0),
-       },
-       { /* Home Agent 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_HA, 1),
-       },
-       { /* MC0 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC0 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC0 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC0 Channel 4 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 3),
-       },
-       { /* MC1 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 4),
-       },
-       { /* MC1 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 5),
-       },
-       { /* MC1 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 6),
-       },
-       { /* MC1 Channel 4 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IMC, 7),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe39),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_IRP, 0),
-       },
-       { /* QPI0 Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI0 Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 1),
-       },
-       { /* QPI1 Port 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_QPI, 2),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI0 Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI0 Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* R3QPI1 Link 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
-               .driver_data = UNCORE_PCI_DEV_DATA(IVBEP_PCI_UNCORE_R3QPI, 2),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT0_FILTER),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT1_FILTER),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver ivbep_uncore_pci_driver = {
-       .name           = "ivbep_uncore",
-       .id_table       = ivbep_uncore_pci_ids,
-};
-
-int ivbep_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x0e1e);
-       if (ret)
-               return ret;
-       uncore_pci_uncores = ivbep_pci_uncores;
-       uncore_pci_driver = &ivbep_uncore_pci_driver;
-       return 0;
-}
-/* end of IvyTown uncore support */
-
-/* KNL uncore support */
-static struct attribute *knl_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_ubox_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_ubox = {
-       .name                   = "ubox",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
-       .event_mask             = KNL_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .ops                    = &snbep_uncore_msr_ops,
-       .format_group           = &knl_uncore_ubox_format_group,
-};
-
-static struct attribute *knl_uncore_cha_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_qor.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid4.attr,
-       &format_attr_filter_link3.attr,
-       &format_attr_filter_state4.attr,
-       &format_attr_filter_local.attr,
-       &format_attr_filter_all_op.attr,
-       &format_attr_filter_nnm.attr,
-       &format_attr_filter_opc3.attr,
-       &format_attr_filter_nc.attr,
-       &format_attr_filter_isoc.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_cha_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_cha_formats_attr,
-};
-
-static struct event_constraint knl_uncore_cha_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg knl_uncore_cha_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x3d, 0xff, 0x2),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x4),
-       EVENT_EXTRA_END
-};
-
-static u64 knl_cha_filter_mask(int fields)
-{
-       u64 mask = 0;
-
-       if (fields & 0x1)
-               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x4)
-               mask |= KNL_CHA_MSR_PMON_BOX_FILTER_OP;
-       return mask;
-}
-
-static struct event_constraint *
-knl_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, knl_cha_filter_mask);
-}
-
-static int knl_cha_hw_config(struct intel_uncore_box *box,
-                            struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = knl_uncore_cha_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
-                           KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & knl_cha_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static void hswep_cbox_enable_event(struct intel_uncore_box *box,
-                                   struct perf_event *event);
-
-static struct intel_uncore_ops knl_uncore_cha_ops = {
-       .init_box               = snbep_uncore_msr_init_box,
-       .disable_box            = snbep_uncore_msr_disable_box,
-       .enable_box             = snbep_uncore_msr_enable_box,
-       .disable_event          = snbep_uncore_msr_disable_event,
-       .enable_event           = hswep_cbox_enable_event,
-       .read_counter           = uncore_msr_read_counter,
-       .hw_config              = knl_cha_hw_config,
-       .get_constraint         = knl_cha_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type knl_uncore_cha = {
-       .name                   = "cha",
-       .num_counters           = 4,
-       .num_boxes              = 38,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
-       .event_mask             = KNL_CHA_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = KNL_CHA_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = knl_uncore_cha_constraints,
-       .ops                    = &knl_uncore_cha_ops,
-       .format_group           = &knl_uncore_cha_format_group,
-};
-
-static struct attribute *knl_uncore_pcu_formats_attr[] = {
-       &format_attr_event2.attr,
-       &format_attr_use_occ_ctr.attr,
-       &format_attr_occ_sel.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh6.attr,
-       &format_attr_occ_invert.attr,
-       &format_attr_occ_edge_det.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_pcu_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_pcu_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = KNL_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
-       .ops                    = &snbep_uncore_msr_ops,
-       .format_group           = &knl_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *knl_msr_uncores[] = {
-       &knl_uncore_ubox,
-       &knl_uncore_cha,
-       &knl_uncore_pcu,
-       NULL,
-};
-
-void knl_uncore_cpu_init(void)
-{
-       uncore_msr_uncores = knl_msr_uncores;
-}
-
-static void knl_uncore_imc_enable_box(struct intel_uncore_box *box)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       int box_ctl = uncore_pci_box_ctl(box);
-
-       pci_write_config_dword(pdev, box_ctl, 0);
-}
-
-static void knl_uncore_imc_enable_event(struct intel_uncore_box *box,
-                                       struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-
-       if ((event->attr.config & SNBEP_PMON_CTL_EV_SEL_MASK)
-                                                       == UNCORE_FIXED_EVENT)
-               pci_write_config_dword(pdev, hwc->config_base,
-                                      hwc->config | KNL_PMON_FIXED_CTL_EN);
-       else
-               pci_write_config_dword(pdev, hwc->config_base,
-                                      hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops knl_uncore_imc_ops = {
-       .init_box       = snbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = knl_uncore_imc_enable_box,
-       .read_counter   = snbep_uncore_pci_read_counter,
-       .enable_event   = knl_uncore_imc_enable_event,
-       .disable_event  = snbep_uncore_pci_disable_event,
-};
-
-static struct intel_uncore_type knl_uncore_imc_uclk = {
-       .name                   = "imc_uclk",
-       .num_counters           = 4,
-       .num_boxes              = 2,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
-       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
-       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_imc_dclk = {
-       .name                   = "imc",
-       .num_counters           = 4,
-       .num_boxes              = 6,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_MC0_CH0_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_MC0_CH0_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_MC0_CH0_MSR_PMON_FIXED_LOW,
-       .fixed_ctl              = KNL_MC0_CH0_MSR_PMON_FIXED_CTL,
-       .box_ctl                = KNL_MC0_CH0_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_edc_uclk = {
-       .name                   = "edc_uclk",
-       .num_counters           = 4,
-       .num_boxes              = 8,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_UCLK_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_UCLK_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_UCLK_MSR_PMON_UCLK_FIXED_LOW,
-       .fixed_ctl              = KNL_UCLK_MSR_PMON_UCLK_FIXED_CTL,
-       .box_ctl                = KNL_UCLK_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type knl_uncore_edc_eclk = {
-       .name                   = "edc_eclk",
-       .num_counters           = 4,
-       .num_boxes              = 8,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = KNL_EDC0_ECLK_MSR_PMON_CTR0_LOW,
-       .event_ctl              = KNL_EDC0_ECLK_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_LOW,
-       .fixed_ctl              = KNL_EDC0_ECLK_MSR_PMON_ECLK_FIXED_CTL,
-       .box_ctl                = KNL_EDC0_ECLK_MSR_PMON_BOX_CTL,
-       .ops                    = &knl_uncore_imc_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct event_constraint knl_uncore_m2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type knl_uncore_m2pcie = {
-       .name           = "m2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .constraints    = knl_uncore_m2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct attribute *knl_uncore_irp_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_qor.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute_group knl_uncore_irp_format_group = {
-       .name = "format",
-       .attrs = knl_uncore_irp_formats_attr,
-};
-
-static struct intel_uncore_type knl_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = KNL_IRP_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = KNL_IRP_PCI_PMON_BOX_CTL,
-       .ops                    = &snbep_uncore_pci_ops,
-       .format_group           = &knl_uncore_irp_format_group,
-};
-
-enum {
-       KNL_PCI_UNCORE_MC_UCLK,
-       KNL_PCI_UNCORE_MC_DCLK,
-       KNL_PCI_UNCORE_EDC_UCLK,
-       KNL_PCI_UNCORE_EDC_ECLK,
-       KNL_PCI_UNCORE_M2PCIE,
-       KNL_PCI_UNCORE_IRP,
-};
-
-static struct intel_uncore_type *knl_pci_uncores[] = {
-       [KNL_PCI_UNCORE_MC_UCLK]        = &knl_uncore_imc_uclk,
-       [KNL_PCI_UNCORE_MC_DCLK]        = &knl_uncore_imc_dclk,
-       [KNL_PCI_UNCORE_EDC_UCLK]       = &knl_uncore_edc_uclk,
-       [KNL_PCI_UNCORE_EDC_ECLK]       = &knl_uncore_edc_eclk,
-       [KNL_PCI_UNCORE_M2PCIE]         = &knl_uncore_m2pcie,
-       [KNL_PCI_UNCORE_IRP]            = &knl_uncore_irp,
-       NULL,
-};
-
-/*
- * KNL uses a common PCI device ID for multiple instances of an Uncore PMU
- * device type. prior to KNL, each instance of a PMU device type had a unique
- * device ID.
- *
- *     PCI Device ID   Uncore PMU Devices
- *     ----------------------------------
- *     0x7841          MC0 UClk, MC1 UClk
- *     0x7843          MC0 DClk CH 0, MC0 DClk CH 1, MC0 DClk CH 2,
- *                     MC1 DClk CH 0, MC1 DClk CH 1, MC1 DClk CH 2
- *     0x7833          EDC0 UClk, EDC1 UClk, EDC2 UClk, EDC3 UClk,
- *                     EDC4 UClk, EDC5 UClk, EDC6 UClk, EDC7 UClk
- *     0x7835          EDC0 EClk, EDC1 EClk, EDC2 EClk, EDC3 EClk,
- *                     EDC4 EClk, EDC5 EClk, EDC6 EClk, EDC7 EClk
- *     0x7817          M2PCIe
- *     0x7814          IRP
-*/
-
-static const struct pci_device_id knl_uncore_pci_ids[] = {
-       { /* MC UClk */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
-       },
-       { /* MC DClk Channel */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
-       },
-       { /* EDC UClk */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
-       },
-       { /* EDC EClk */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
-       },
-       { /* M2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_M2PCIE, 0),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7814),
-               .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_IRP, 0),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver knl_uncore_pci_driver = {
-       .name           = "knl_uncore",
-       .id_table       = knl_uncore_pci_ids,
-};
-
-int knl_uncore_pci_init(void)
-{
-       int ret;
-
-       /* All KNL PCI based PMON units are on the same PCI bus except IRP */
-       ret = snb_pci2phy_map_init(0x7814); /* IRP */
-       if (ret)
-               return ret;
-       ret = snb_pci2phy_map_init(0x7817); /* M2PCIe */
-       if (ret)
-               return ret;
-       uncore_pci_uncores = knl_pci_uncores;
-       uncore_pci_driver = &knl_uncore_pci_driver;
-       return 0;
-}
-
-/* end of KNL uncore support */
-
-/* Haswell-EP uncore support */
-static struct attribute *hswep_uncore_ubox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh5.attr,
-       &format_attr_filter_tid2.attr,
-       &format_attr_filter_cid.attr,
-       NULL,
-};
-
-static struct attribute_group hswep_uncore_ubox_format_group = {
-       .name = "format",
-       .attrs = hswep_uncore_ubox_formats_attr,
-};
-
-static int hswep_ubox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       reg1->reg = HSWEP_U_MSR_PMON_FILTER;
-       reg1->config = event->attr.config1 & HSWEP_U_MSR_PMON_BOX_FILTER_MASK;
-       reg1->idx = 0;
-       return 0;
-}
-
-static struct intel_uncore_ops hswep_uncore_ubox_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = hswep_ubox_hw_config,
-       .get_constraint         = uncore_get_constraint,
-       .put_constraint         = uncore_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_ubox = {
-       .name                   = "ubox",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 44,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &hswep_uncore_ubox_ops,
-       .format_group           = &hswep_uncore_ubox_format_group,
-};
-
-static struct attribute *hswep_uncore_cbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_thresh8.attr,
-       &format_attr_filter_tid3.attr,
-       &format_attr_filter_link2.attr,
-       &format_attr_filter_state3.attr,
-       &format_attr_filter_nid2.attr,
-       &format_attr_filter_opc2.attr,
-       &format_attr_filter_nc.attr,
-       &format_attr_filter_c6.attr,
-       &format_attr_filter_isoc.attr,
-       NULL,
-};
-
-static struct attribute_group hswep_uncore_cbox_format_group = {
-       .name = "format",
-       .attrs = hswep_uncore_cbox_formats_attr,
-};
-
-static struct event_constraint hswep_uncore_cbox_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct extra_reg hswep_uncore_cbox_extra_regs[] = {
-       SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
-                                 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x4),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4037, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4028, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4032, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4029, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4033, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x402A, 0x40ff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x12),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8135, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x8),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8336, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x8136, 0xffff, 0x10),
-       SNBEP_CBO_EVENT_EXTRA_REG(0x5036, 0xffff, 0x8),
-       EVENT_EXTRA_END
-};
-
-static u64 hswep_cbox_filter_mask(int fields)
-{
-       u64 mask = 0;
-       if (fields & 0x1)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_TID;
-       if (fields & 0x2)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_LINK;
-       if (fields & 0x4)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_STATE;
-       if (fields & 0x8)
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NID;
-       if (fields & 0x10) {
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_OPC;
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_NC;
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_C6;
-               mask |= HSWEP_CB0_MSR_PMON_BOX_FILTER_ISOC;
-       }
-       return mask;
-}
-
-static struct event_constraint *
-hswep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
-{
-       return __snbep_cbox_get_constraint(box, event, hswep_cbox_filter_mask);
-}
-
-static int hswep_cbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
-       struct extra_reg *er;
-       int idx = 0;
-
-       for (er = hswep_uncore_cbox_extra_regs; er->msr; er++) {
-               if (er->event != (event->hw.config & er->config_mask))
-                       continue;
-               idx |= er->idx;
-       }
-
-       if (idx) {
-               reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
-                           HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
-               reg1->config = event->attr.config1 & hswep_cbox_filter_mask(idx);
-               reg1->idx = idx;
-       }
-       return 0;
-}
-
-static void hswep_cbox_enable_event(struct intel_uncore_box *box,
-                                 struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-
-       if (reg1->idx != EXTRA_REG_NONE) {
-               u64 filter = uncore_shared_reg_config(box, 0);
-               wrmsrl(reg1->reg, filter & 0xffffffff);
-               wrmsrl(reg1->reg + 1, filter >> 32);
-       }
-
-       wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
-}
-
-static struct intel_uncore_ops hswep_uncore_cbox_ops = {
-       .init_box               = snbep_uncore_msr_init_box,
-       .disable_box            = snbep_uncore_msr_disable_box,
-       .enable_box             = snbep_uncore_msr_enable_box,
-       .disable_event          = snbep_uncore_msr_disable_event,
-       .enable_event           = hswep_cbox_enable_event,
-       .read_counter           = uncore_msr_read_counter,
-       .hw_config              = hswep_cbox_hw_config,
-       .get_constraint         = hswep_cbox_get_constraint,
-       .put_constraint         = snbep_cbox_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 18,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
-       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = hswep_uncore_cbox_constraints,
-       .ops                    = &hswep_uncore_cbox_ops,
-       .format_group           = &hswep_uncore_cbox_format_group,
-};
-
-/*
- * Write SBOX Initialization register bit by bit to avoid spurious #GPs
- */
-static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
-{
-       unsigned msr = uncore_msr_box_ctl(box);
-
-       if (msr) {
-               u64 init = SNBEP_PMON_BOX_CTL_INT;
-               u64 flags = 0;
-               int i;
-
-               for_each_set_bit(i, (unsigned long *)&init, 64) {
-                       flags |= (1ULL << i);
-                       wrmsrl(msr, flags);
-               }
-       }
-}
-
-static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
-       __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .init_box               = hswep_uncore_sbox_msr_init_box
-};
-
-static struct attribute *hswep_uncore_sbox_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_tid_en.attr,
-       &format_attr_inv.attr,
-       &format_attr_thresh8.attr,
-       NULL,
-};
-
-static struct attribute_group hswep_uncore_sbox_format_group = {
-       .name = "format",
-       .attrs = hswep_uncore_sbox_formats_attr,
-};
-
-static struct intel_uncore_type hswep_uncore_sbox = {
-       .name                   = "sbox",
-       .num_counters           = 4,
-       .num_boxes              = 4,
-       .perf_ctr_bits          = 44,
-       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
-       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
-       .ops                    = &hswep_uncore_sbox_msr_ops,
-       .format_group           = &hswep_uncore_sbox_format_group,
-};
-
-static int hswep_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
-       int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK;
-
-       if (ev_sel >= 0xb && ev_sel <= 0xe) {
-               reg1->reg = HSWEP_PCU_MSR_PMON_BOX_FILTER;
-               reg1->idx = ev_sel - 0xb;
-               reg1->config = event->attr.config1 & (0xff << reg1->idx);
-       }
-       return 0;
-}
-
-static struct intel_uncore_ops hswep_uncore_pcu_ops = {
-       SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
-       .hw_config              = hswep_pcu_hw_config,
-       .get_constraint         = snbep_pcu_get_constraint,
-       .put_constraint         = snbep_pcu_put_constraint,
-};
-
-static struct intel_uncore_type hswep_uncore_pcu = {
-       .name                   = "pcu",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = HSWEP_PCU_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_PCU_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_PCU_MSR_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &hswep_uncore_pcu_ops,
-       .format_group           = &snbep_uncore_pcu_format_group,
-};
-
-static struct intel_uncore_type *hswep_msr_uncores[] = {
-       &hswep_uncore_ubox,
-       &hswep_uncore_cbox,
-       &hswep_uncore_sbox,
-       &hswep_uncore_pcu,
-       NULL,
-};
-
-void hswep_uncore_cpu_init(void)
-{
-       if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-
-       /* Detect 6-8 core systems with only two SBOXes */
-       if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
-               u32 capid4;
-
-               pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
-                                     0x94, &capid4);
-               if (((capid4 >> 6) & 0x3) == 0)
-                       hswep_uncore_sbox.num_boxes = 2;
-       }
-
-       uncore_msr_uncores = hswep_msr_uncores;
-}
-
-static struct intel_uncore_type hswep_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 5,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 48,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct uncore_event_desc hswep_uncore_imc_events[] = {
-       INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0x00,umask=0x00"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
-       INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
-       { /* end: all zeroes */ },
-};
-
-static struct intel_uncore_type hswep_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 5,
-       .num_boxes      = 8,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = hswep_uncore_imc_events,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
-
-static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
-{
-       struct pci_dev *pdev = box->pci_dev;
-       struct hw_perf_event *hwc = &event->hw;
-       u64 count = 0;
-
-       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
-       pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
-
-       return count;
-}
-
-static struct intel_uncore_ops hswep_uncore_irp_ops = {
-       .init_box       = snbep_uncore_pci_init_box,
-       .disable_box    = snbep_uncore_pci_disable_box,
-       .enable_box     = snbep_uncore_pci_enable_box,
-       .disable_event  = ivbep_uncore_irp_disable_event,
-       .enable_event   = ivbep_uncore_irp_enable_event,
-       .read_counter   = hswep_uncore_irp_read_counter,
-};
-
-static struct intel_uncore_type hswep_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .ops                    = &hswep_uncore_irp_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type hswep_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 5,
-       .num_boxes              = 3,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_qpi_ops,
-       .format_group           = &snbep_uncore_qpi_format_group,
-};
-
-static struct event_constraint hswep_uncore_r2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x24, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x27, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2a, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x2b, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type hswep_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .constraints    = hswep_uncore_r2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type hswep_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 4,
-       .num_boxes      = 3,
-       .perf_ctr_bits  = 44,
-       .constraints    = hswep_uncore_r3qpi_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       HSWEP_PCI_UNCORE_HA,
-       HSWEP_PCI_UNCORE_IMC,
-       HSWEP_PCI_UNCORE_IRP,
-       HSWEP_PCI_UNCORE_QPI,
-       HSWEP_PCI_UNCORE_R2PCIE,
-       HSWEP_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *hswep_pci_uncores[] = {
-       [HSWEP_PCI_UNCORE_HA]   = &hswep_uncore_ha,
-       [HSWEP_PCI_UNCORE_IMC]  = &hswep_uncore_imc,
-       [HSWEP_PCI_UNCORE_IRP]  = &hswep_uncore_irp,
-       [HSWEP_PCI_UNCORE_QPI]  = &hswep_uncore_qpi,
-       [HSWEP_PCI_UNCORE_R2PCIE]       = &hswep_uncore_r2pcie,
-       [HSWEP_PCI_UNCORE_R3QPI]        = &hswep_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id hswep_uncore_pci_ids[] = {
-       { /* Home Agent 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f30),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 0),
-       },
-       { /* Home Agent 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f38),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_HA, 1),
-       },
-       { /* MC0 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb0),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC0 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb1),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC0 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb4),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC0 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fb5),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 3),
-       },
-       { /* MC1 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd0),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 4),
-       },
-       { /* MC1 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd1),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 5),
-       },
-       { /* MC1 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd4),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 6),
-       },
-       { /* MC1 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fd5),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IMC, 7),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f39),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_IRP, 0),
-       },
-       { /* QPI0 Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f32),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI0 Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f33),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 1),
-       },
-       { /* QPI1 Port 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3a),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_QPI, 2),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f34),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI0 Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f36),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI0 Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f37),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* R3QPI1 Link 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f3e),
-               .driver_data = UNCORE_PCI_DEV_DATA(HSWEP_PCI_UNCORE_R3QPI, 2),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT0_FILTER),
-       },
-       { /* QPI Port 1 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2f96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  SNBEP_PCI_QPI_PORT1_FILTER),
-       },
-       { /* PCU.3 (for Capability registers) */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
-                                                  HSWEP_PCI_PCU_3),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver hswep_uncore_pci_driver = {
-       .name           = "hswep_uncore",
-       .id_table       = hswep_uncore_pci_ids,
-};
-
-int hswep_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x2f1e);
-       if (ret)
-               return ret;
-       uncore_pci_uncores = hswep_pci_uncores;
-       uncore_pci_driver = &hswep_uncore_pci_driver;
-       return 0;
-}
-/* end of Haswell-EP uncore support */
-
-/* BDX uncore support */
-
-static struct intel_uncore_type bdx_uncore_ubox = {
-       .name                   = "ubox",
-       .num_counters           = 2,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .fixed_ctr_bits         = 48,
-       .perf_ctr               = HSWEP_U_MSR_PMON_CTR0,
-       .event_ctl              = HSWEP_U_MSR_PMON_CTL0,
-       .event_mask             = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
-       .fixed_ctr              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
-       .fixed_ctl              = HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &ivbep_uncore_msr_ops,
-       .format_group           = &ivbep_uncore_ubox_format_group,
-};
-
-static struct event_constraint bdx_uncore_cbox_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x3e, 0x1),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_cbox = {
-       .name                   = "cbox",
-       .num_counters           = 4,
-       .num_boxes              = 24,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_C0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_C0_MSR_PMON_CTR0,
-       .event_mask             = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_C0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_CBO_MSR_OFFSET,
-       .num_shared_regs        = 1,
-       .constraints            = bdx_uncore_cbox_constraints,
-       .ops                    = &hswep_uncore_cbox_ops,
-       .format_group           = &hswep_uncore_cbox_format_group,
-};
-
-static struct intel_uncore_type bdx_uncore_sbox = {
-       .name                   = "sbox",
-       .num_counters           = 4,
-       .num_boxes              = 4,
-       .perf_ctr_bits          = 48,
-       .event_ctl              = HSWEP_S0_MSR_PMON_CTL0,
-       .perf_ctr               = HSWEP_S0_MSR_PMON_CTR0,
-       .event_mask             = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
-       .box_ctl                = HSWEP_S0_MSR_PMON_BOX_CTL,
-       .msr_offset             = HSWEP_SBOX_MSR_OFFSET,
-       .ops                    = &hswep_uncore_sbox_msr_ops,
-       .format_group           = &hswep_uncore_sbox_format_group,
-};
-
-static struct intel_uncore_type *bdx_msr_uncores[] = {
-       &bdx_uncore_ubox,
-       &bdx_uncore_cbox,
-       &bdx_uncore_sbox,
-       &hswep_uncore_pcu,
-       NULL,
-};
-
-void bdx_uncore_cpu_init(void)
-{
-       if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
-               bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
-       uncore_msr_uncores = bdx_msr_uncores;
-}
-
-static struct intel_uncore_type bdx_uncore_ha = {
-       .name           = "ha",
-       .num_counters   = 4,
-       .num_boxes      = 2,
-       .perf_ctr_bits  = 48,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type bdx_uncore_imc = {
-       .name           = "imc",
-       .num_counters   = 5,
-       .num_boxes      = 8,
-       .perf_ctr_bits  = 48,
-       .fixed_ctr_bits = 48,
-       .fixed_ctr      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
-       .fixed_ctl      = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
-       .event_descs    = hswep_uncore_imc_events,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct intel_uncore_type bdx_uncore_irp = {
-       .name                   = "irp",
-       .num_counters           = 4,
-       .num_boxes              = 1,
-       .perf_ctr_bits          = 48,
-       .event_mask             = SNBEP_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .ops                    = &hswep_uncore_irp_ops,
-       .format_group           = &snbep_uncore_format_group,
-};
-
-static struct intel_uncore_type bdx_uncore_qpi = {
-       .name                   = "qpi",
-       .num_counters           = 4,
-       .num_boxes              = 3,
-       .perf_ctr_bits          = 48,
-       .perf_ctr               = SNBEP_PCI_PMON_CTR0,
-       .event_ctl              = SNBEP_PCI_PMON_CTL0,
-       .event_mask             = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
-       .box_ctl                = SNBEP_PCI_PMON_BOX_CTL,
-       .num_shared_regs        = 1,
-       .ops                    = &snbep_uncore_qpi_ops,
-       .format_group           = &snbep_uncore_qpi_format_group,
-};
-
-static struct event_constraint bdx_uncore_r2pcie_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_r2pcie = {
-       .name           = "r2pcie",
-       .num_counters   = 4,
-       .num_boxes      = 1,
-       .perf_ctr_bits  = 48,
-       .constraints    = bdx_uncore_r2pcie_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-static struct event_constraint bdx_uncore_r3qpi_constraints[] = {
-       UNCORE_EVENT_CONSTRAINT(0x01, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x07, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x08, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x09, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0a, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x0e, 0x7),
-       UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
-       UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x15, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x1f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x28, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x29, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2c, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2e, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x2f, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
-       UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
-       EVENT_CONSTRAINT_END
-};
-
-static struct intel_uncore_type bdx_uncore_r3qpi = {
-       .name           = "r3qpi",
-       .num_counters   = 3,
-       .num_boxes      = 3,
-       .perf_ctr_bits  = 48,
-       .constraints    = bdx_uncore_r3qpi_constraints,
-       SNBEP_UNCORE_PCI_COMMON_INIT(),
-};
-
-enum {
-       BDX_PCI_UNCORE_HA,
-       BDX_PCI_UNCORE_IMC,
-       BDX_PCI_UNCORE_IRP,
-       BDX_PCI_UNCORE_QPI,
-       BDX_PCI_UNCORE_R2PCIE,
-       BDX_PCI_UNCORE_R3QPI,
-};
-
-static struct intel_uncore_type *bdx_pci_uncores[] = {
-       [BDX_PCI_UNCORE_HA]     = &bdx_uncore_ha,
-       [BDX_PCI_UNCORE_IMC]    = &bdx_uncore_imc,
-       [BDX_PCI_UNCORE_IRP]    = &bdx_uncore_irp,
-       [BDX_PCI_UNCORE_QPI]    = &bdx_uncore_qpi,
-       [BDX_PCI_UNCORE_R2PCIE] = &bdx_uncore_r2pcie,
-       [BDX_PCI_UNCORE_R3QPI]  = &bdx_uncore_r3qpi,
-       NULL,
-};
-
-static const struct pci_device_id bdx_uncore_pci_ids[] = {
-       { /* Home Agent 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0),
-       },
-       { /* Home Agent 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f38),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 1),
-       },
-       { /* MC0 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb0),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 0),
-       },
-       { /* MC0 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb1),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 1),
-       },
-       { /* MC0 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb4),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 2),
-       },
-       { /* MC0 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fb5),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 3),
-       },
-       { /* MC1 Channel 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd0),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 4),
-       },
-       { /* MC1 Channel 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd1),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 5),
-       },
-       { /* MC1 Channel 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd4),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 6),
-       },
-       { /* MC1 Channel 3 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fd5),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IMC, 7),
-       },
-       { /* IRP */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f39),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_IRP, 0),
-       },
-       { /* QPI0 Port 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f32),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 0),
-       },
-       { /* QPI0 Port 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f33),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 1),
-       },
-       { /* QPI1 Port 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3a),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_QPI, 2),
-       },
-       { /* R2PCIe */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f34),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R2PCIE, 0),
-       },
-       { /* R3QPI0 Link 0 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f36),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 0),
-       },
-       { /* R3QPI0 Link 1 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f37),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 1),
-       },
-       { /* R3QPI1 Link 2 */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f3e),
-               .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_R3QPI, 2),
-       },
-       { /* QPI Port 0 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
-       },
-       { /* QPI Port 1 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
-       },
-       { /* QPI Port 2 filter  */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
-               .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
-       },
-       { /* end: all zeroes */ }
-};
-
-static struct pci_driver bdx_uncore_pci_driver = {
-       .name           = "bdx_uncore",
-       .id_table       = bdx_uncore_pci_ids,
-};
-
-int bdx_uncore_pci_init(void)
-{
-       int ret = snbep_pci2phy_map_init(0x6f1e);
-
-       if (ret)
-               return ret;
-       uncore_pci_uncores = bdx_pci_uncores;
-       uncore_pci_driver = &bdx_uncore_pci_driver;
-       return 0;
-}
-
-/* end of BDX uncore support */
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
deleted file mode 100644 (file)
index 5b0c232..0000000
+++ /dev/null
@@ -1,319 +0,0 @@
-/* Driver for Intel Xeon Phi "Knights Corner" PMU */
-
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include <asm/hardirq.h>
-
-#include "perf_event.h"
-
-static const u64 knc_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]           = 0x002a,
-  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x0016,
-  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0028,
-  [PERF_COUNT_HW_CACHE_MISSES]         = 0x0029,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x0012,
-  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x002b,
-};
-
-static const u64 __initconst knc_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               /* On Xeon Phi event "0" is a valid DATA_READ          */
-               /*   (L1 Data Cache Reads) Instruction.                */
-               /* We code this as ARCH_PERFMON_EVENTSEL_INT as this   */
-               /* bit will always be set in x86_pmu_hw_config().      */
-               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
-                                               /* DATA_READ           */
-               [ C(RESULT_MISS)   ] = 0x0003,  /* DATA_READ_MISS      */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE          */
-               [ C(RESULT_MISS)   ] = 0x0004,  /* DATA_WRITE_MISS     */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0011,  /* L1_DATA_PF1         */
-               [ C(RESULT_MISS)   ] = 0x001c,  /* L1_DATA_PF1_MISS    */
-       },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ          */
-               [ C(RESULT_MISS)   ] = 0x000e,  /* CODE_CACHE_MISS    */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x10cb,  /* L2_READ_MISS */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x10cc,  /* L2_WRITE_HIT */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x10fc,  /* L2_DATA_PF2      */
-               [ C(RESULT_MISS)   ] = 0x10fe,  /* L2_DATA_PF2_MISS */
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
-                                               /* DATA_READ */
-                                               /* see note on L1 OP_READ */
-               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0001,  /* DATA_WRITE */
-               [ C(RESULT_MISS)   ] = 0x0002,  /* DATA_PAGE_WALK */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = 0x0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x000c,  /* CODE_READ */
-               [ C(RESULT_MISS)   ] = 0x000d,  /* CODE_PAGE_WALK */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0012,  /* BRANCHES */
-               [ C(RESULT_MISS)   ] = 0x002b,  /* BRANCHES_MISPREDICTED */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-
-static u64 knc_pmu_event_map(int hw_event)
-{
-       return knc_perfmon_event_map[hw_event];
-}
-
-static struct event_constraint knc_event_constraints[] =
-{
-       INTEL_EVENT_CONSTRAINT(0xc3, 0x1),      /* HWP_L2HIT */
-       INTEL_EVENT_CONSTRAINT(0xc4, 0x1),      /* HWP_L2MISS */
-       INTEL_EVENT_CONSTRAINT(0xc8, 0x1),      /* L2_READ_HIT_E */
-       INTEL_EVENT_CONSTRAINT(0xc9, 0x1),      /* L2_READ_HIT_M */
-       INTEL_EVENT_CONSTRAINT(0xca, 0x1),      /* L2_READ_HIT_S */
-       INTEL_EVENT_CONSTRAINT(0xcb, 0x1),      /* L2_READ_MISS */
-       INTEL_EVENT_CONSTRAINT(0xcc, 0x1),      /* L2_WRITE_HIT */
-       INTEL_EVENT_CONSTRAINT(0xce, 0x1),      /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
-       INTEL_EVENT_CONSTRAINT(0xcf, 0x1),      /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
-       INTEL_EVENT_CONSTRAINT(0xd7, 0x1),      /* L2_VICTIM_REQ_WITH_DATA */
-       INTEL_EVENT_CONSTRAINT(0xe3, 0x1),      /* SNP_HITM_BUNIT */
-       INTEL_EVENT_CONSTRAINT(0xe6, 0x1),      /* SNP_HIT_L2 */
-       INTEL_EVENT_CONSTRAINT(0xe7, 0x1),      /* SNP_HITM_L2 */
-       INTEL_EVENT_CONSTRAINT(0xf1, 0x1),      /* L2_DATA_READ_MISS_CACHE_FILL */
-       INTEL_EVENT_CONSTRAINT(0xf2, 0x1),      /* L2_DATA_WRITE_MISS_CACHE_FILL */
-       INTEL_EVENT_CONSTRAINT(0xf6, 0x1),      /* L2_DATA_READ_MISS_MEM_FILL */
-       INTEL_EVENT_CONSTRAINT(0xf7, 0x1),      /* L2_DATA_WRITE_MISS_MEM_FILL */
-       INTEL_EVENT_CONSTRAINT(0xfc, 0x1),      /* L2_DATA_PF2 */
-       INTEL_EVENT_CONSTRAINT(0xfd, 0x1),      /* L2_DATA_PF2_DROP */
-       INTEL_EVENT_CONSTRAINT(0xfe, 0x1),      /* L2_DATA_PF2_MISS */
-       INTEL_EVENT_CONSTRAINT(0xff, 0x1),      /* L2_DATA_HIT_INFLIGHT_PF2 */
-       EVENT_CONSTRAINT_END
-};
-
-#define MSR_KNC_IA32_PERF_GLOBAL_STATUS                0x0000002d
-#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL   0x0000002e
-#define MSR_KNC_IA32_PERF_GLOBAL_CTRL          0x0000002f
-
-#define KNC_ENABLE_COUNTER0                    0x00000001
-#define KNC_ENABLE_COUNTER1                    0x00000002
-
-static void knc_pmu_disable_all(void)
-{
-       u64 val;
-
-       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-       val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
-       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-}
-
-static void knc_pmu_enable_all(int added)
-{
-       u64 val;
-
-       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-       val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
-       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
-}
-
-static inline void
-knc_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val;
-
-       val = hwc->config;
-       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-
-       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
-}
-
-static void knc_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val;
-
-       val = hwc->config;
-       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-
-       (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
-}
-
-static inline u64 knc_pmu_get_status(void)
-{
-       u64 status;
-
-       rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
-
-       return status;
-}
-
-static inline void knc_pmu_ack_status(u64 ack)
-{
-       wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
-}
-
-static int knc_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       int handled = 0;
-       int bit, loops;
-       u64 status;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       knc_pmu_disable_all();
-
-       status = knc_pmu_get_status();
-       if (!status) {
-               knc_pmu_enable_all(0);
-               return handled;
-       }
-
-       loops = 0;
-again:
-       knc_pmu_ack_status(status);
-       if (++loops > 100) {
-               WARN_ONCE(1, "perf: irq loop stuck!\n");
-               perf_event_print_debug();
-               goto done;
-       }
-
-       inc_irq_stat(apic_perf_irqs);
-
-       for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-               struct perf_event *event = cpuc->events[bit];
-
-               handled++;
-
-               if (!test_bit(bit, cpuc->active_mask))
-                       continue;
-
-               if (!intel_pmu_save_and_restart(event))
-                       continue;
-
-               perf_sample_data_init(&data, 0, event->hw.last_period);
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       /*
-        * Repeat if there is more work to be done:
-        */
-       status = knc_pmu_get_status();
-       if (status)
-               goto again;
-
-done:
-       knc_pmu_enable_all(0);
-
-       return handled;
-}
-
-
-PMU_FORMAT_ATTR(event, "config:0-7"    );
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-
-static struct attribute *intel_knc_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-static const struct x86_pmu knc_pmu __initconst = {
-       .name                   = "knc",
-       .handle_irq             = knc_pmu_handle_irq,
-       .disable_all            = knc_pmu_disable_all,
-       .enable_all             = knc_pmu_enable_all,
-       .enable                 = knc_pmu_enable_event,
-       .disable                = knc_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_KNC_EVNTSEL0,
-       .perfctr                = MSR_KNC_PERFCTR0,
-       .event_map              = knc_pmu_event_map,
-       .max_events             = ARRAY_SIZE(knc_perfmon_event_map),
-       .apic                   = 1,
-       .max_period             = (1ULL << 39) - 1,
-       .version                = 0,
-       .num_counters           = 2,
-       .cntval_bits            = 40,
-       .cntval_mask            = (1ULL << 40) - 1,
-       .get_event_constraints  = x86_get_event_constraints,
-       .event_constraints      = knc_event_constraints,
-       .format_attrs           = intel_knc_formats_attr,
-};
-
-__init int knc_pmu_init(void)
-{
-       x86_pmu = knc_pmu;
-
-       memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 
-               sizeof(hw_cache_event_ids));
-
-       return 0;
-}
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c
deleted file mode 100644 (file)
index ec863b9..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-#include <linux/perf_event.h>
-
-enum perf_msr_id {
-       PERF_MSR_TSC                    = 0,
-       PERF_MSR_APERF                  = 1,
-       PERF_MSR_MPERF                  = 2,
-       PERF_MSR_PPERF                  = 3,
-       PERF_MSR_SMI                    = 4,
-
-       PERF_MSR_EVENT_MAX,
-};
-
-static bool test_aperfmperf(int idx)
-{
-       return boot_cpu_has(X86_FEATURE_APERFMPERF);
-}
-
-static bool test_intel(int idx)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-           boot_cpu_data.x86 != 6)
-               return false;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-       case 76: /* 14nm Atom "Airmont"                   */
-               if (idx == PERF_MSR_SMI)
-                       return true;
-               break;
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
-                       return true;
-               break;
-       }
-
-       return false;
-}
-
-struct perf_msr {
-       u64     msr;
-       struct  perf_pmu_events_attr *attr;
-       bool    (*test)(int idx);
-};
-
-PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
-
-static struct perf_msr msr[] = {
-       [PERF_MSR_TSC]   = { 0,                 &evattr_tsc,    NULL,            },
-       [PERF_MSR_APERF] = { MSR_IA32_APERF,    &evattr_aperf,  test_aperfmperf, },
-       [PERF_MSR_MPERF] = { MSR_IA32_MPERF,    &evattr_mperf,  test_aperfmperf, },
-       [PERF_MSR_PPERF] = { MSR_PPERF,         &evattr_pperf,  test_intel,      },
-       [PERF_MSR_SMI]   = { MSR_SMI_COUNT,     &evattr_smi,    test_intel,      },
-};
-
-static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
-       NULL,
-};
-
-static struct attribute_group events_attr_group = {
-       .name = "events",
-       .attrs = events_attrs,
-};
-
-PMU_FORMAT_ATTR(event, "config:0-63");
-static struct attribute *format_attrs[] = {
-       &format_attr_event.attr,
-       NULL,
-};
-static struct attribute_group format_attr_group = {
-       .name = "format",
-       .attrs = format_attrs,
-};
-
-static const struct attribute_group *attr_groups[] = {
-       &events_attr_group,
-       &format_attr_group,
-       NULL,
-};
-
-static int msr_event_init(struct perf_event *event)
-{
-       u64 cfg = event->attr.config;
-
-       if (event->attr.type != event->pmu->type)
-               return -ENOENT;
-
-       if (cfg >= PERF_MSR_EVENT_MAX)
-               return -EINVAL;
-
-       /* unsupported modes and filters */
-       if (event->attr.exclude_user   ||
-           event->attr.exclude_kernel ||
-           event->attr.exclude_hv     ||
-           event->attr.exclude_idle   ||
-           event->attr.exclude_host   ||
-           event->attr.exclude_guest  ||
-           event->attr.sample_period) /* no sampling */
-               return -EINVAL;
-
-       if (!msr[cfg].attr)
-               return -EINVAL;
-
-       event->hw.idx = -1;
-       event->hw.event_base = msr[cfg].msr;
-       event->hw.config = cfg;
-
-       return 0;
-}
-
-static inline u64 msr_read_counter(struct perf_event *event)
-{
-       u64 now;
-
-       if (event->hw.event_base)
-               rdmsrl(event->hw.event_base, now);
-       else
-               rdtscll(now);
-
-       return now;
-}
-static void msr_event_update(struct perf_event *event)
-{
-       u64 prev, now;
-       s64 delta;
-
-       /* Careful, an NMI might modify the previous event value. */
-again:
-       prev = local64_read(&event->hw.prev_count);
-       now = msr_read_counter(event);
-
-       if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
-               goto again;
-
-       delta = now - prev;
-       if (unlikely(event->hw.event_base == MSR_SMI_COUNT))
-               delta = sign_extend64(delta, 31);
-
-       local64_add(now - prev, &event->count);
-}
-
-static void msr_event_start(struct perf_event *event, int flags)
-{
-       u64 now;
-
-       now = msr_read_counter(event);
-       local64_set(&event->hw.prev_count, now);
-}
-
-static void msr_event_stop(struct perf_event *event, int flags)
-{
-       msr_event_update(event);
-}
-
-static void msr_event_del(struct perf_event *event, int flags)
-{
-       msr_event_stop(event, PERF_EF_UPDATE);
-}
-
-static int msr_event_add(struct perf_event *event, int flags)
-{
-       if (flags & PERF_EF_START)
-               msr_event_start(event, flags);
-
-       return 0;
-}
-
-static struct pmu pmu_msr = {
-       .task_ctx_nr    = perf_sw_context,
-       .attr_groups    = attr_groups,
-       .event_init     = msr_event_init,
-       .add            = msr_event_add,
-       .del            = msr_event_del,
-       .start          = msr_event_start,
-       .stop           = msr_event_stop,
-       .read           = msr_event_update,
-       .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
-};
-
-static int __init msr_init(void)
-{
-       int i, j = 0;
-
-       if (!boot_cpu_has(X86_FEATURE_TSC)) {
-               pr_cont("no MSR PMU driver.\n");
-               return 0;
-       }
-
-       /* Probe the MSRs. */
-       for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
-               u64 val;
-
-               /*
-                * Virt sucks arse; you cannot tell if a R/O MSR is present :/
-                */
-               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
-                       msr[i].attr = NULL;
-       }
-
-       /* List remaining MSRs in the sysfs attrs. */
-       for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
-               if (msr[i].attr)
-                       events_attrs[j++] = &msr[i].attr->attr.attr;
-       }
-       events_attrs[j] = NULL;
-
-       perf_pmu_register(&pmu_msr, "msr", -1);
-
-       return 0;
-}
-device_initcall(msr_init);
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
deleted file mode 100644 (file)
index f2e5678..0000000
+++ /dev/null
@@ -1,1376 +0,0 @@
-/*
- * Netburst Performance Events (P4, old Xeon)
- *
- *  Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
- *  Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_event.h>
-
-#include <asm/perf_event_p4.h>
-#include <asm/hardirq.h>
-#include <asm/apic.h>
-
-#include "perf_event.h"
-
-#define P4_CNTR_LIMIT 3
-/*
- * array indices: 0,1 - HT threads, used with HT enabled cpu
- */
-struct p4_event_bind {
-       unsigned int opcode;                    /* Event code and ESCR selector */
-       unsigned int escr_msr[2];               /* ESCR MSR for this event */
-       unsigned int escr_emask;                /* valid ESCR EventMask bits */
-       unsigned int shared;                    /* event is shared across threads */
-       char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
-};
-
-struct p4_pebs_bind {
-       unsigned int metric_pebs;
-       unsigned int metric_vert;
-};
-
-/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
-#define P4_GEN_PEBS_BIND(name, pebs, vert)                     \
-       [P4_PEBS_METRIC__##name] = {                            \
-               .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,   \
-               .metric_vert = vert,                            \
-       }
-
-/*
- * note we have P4_PEBS_ENABLE_UOP_TAG always set here
- *
- * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
- * event configuration to find out which values are to be
- * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
- * resgisters
- */
-static struct p4_pebs_bind p4_pebs_bind_map[] = {
-       P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,  0x0000001, 0x0000001),
-       P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,  0x0000002, 0x0000001),
-       P4_GEN_PEBS_BIND(dtlb_load_miss_retired,        0x0000004, 0x0000001),
-       P4_GEN_PEBS_BIND(dtlb_store_miss_retired,       0x0000004, 0x0000002),
-       P4_GEN_PEBS_BIND(dtlb_all_miss_retired,         0x0000004, 0x0000003),
-       P4_GEN_PEBS_BIND(tagged_mispred_branch,         0x0018000, 0x0000010),
-       P4_GEN_PEBS_BIND(mob_load_replay_retired,       0x0000200, 0x0000001),
-       P4_GEN_PEBS_BIND(split_load_retired,            0x0000400, 0x0000001),
-       P4_GEN_PEBS_BIND(split_store_retired,           0x0000400, 0x0000002),
-};
-
-/*
- * Note that we don't use CCCR1 here, there is an
- * exception for P4_BSQ_ALLOCATION but we just have
- * no workaround
- *
- * consider this binding as resources which particular
- * event may borrow, it doesn't contain EventMask,
- * Tags and friends -- they are left to a caller
- */
-static struct p4_event_bind p4_event_bind_map[] = {
-       [P4_EVENT_TC_DELIVER_MODE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
-               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
-               .shared         = 1,
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_BPU_FETCH_REQUEST] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
-               .escr_msr       = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_ITLB_REFERENCE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
-               .escr_msr       = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_MEMORY_CANCEL] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
-               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_MEMORY_COMPLETE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
-               .escr_msr       = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_LOAD_PORT_REPLAY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
-               .escr_msr       = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_STORE_PORT_REPLAY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
-               .escr_msr       = { MSR_P4_SAAT_ESCR0 ,  MSR_P4_SAAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_MOB_LOAD_REPLAY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
-               .escr_msr       = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_PAGE_WALK_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
-               .escr_msr       = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
-               .shared         = 1,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_BSQ_CACHE_REFERENCE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
-               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_IOQ_ALLOCATION] = {
-               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN)                 |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER)               |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_IOQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
-               .opcode         = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
-               .escr_msr       = { MSR_P4_FSB_ESCR1,  MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB)          |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
-               .cntr           = { {2, -1, -1}, {3, -1, -1} },
-       },
-       [P4_EVENT_FSB_DATA_ACTIVITY] = {
-               .opcode         = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
-               .shared         = 1,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_BSQ_ALLOCATION] = {           /* shared ESCR, broken CCCR1 */
-               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
-               .escr_msr       = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE)      |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE)      |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
-               .cntr           = { {0, -1, -1}, {1, -1, -1} },
-       },
-       [P4_EVENT_BSQ_ACTIVE_ENTRIES] = {       /* shared ESCR */
-               .opcode         = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
-               .escr_msr       = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1)        |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE)     |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE)  |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE)  |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1)       |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
-               .cntr           = { {2, -1, -1}, {3, -1, -1} },
-       },
-       [P4_EVENT_SSE_INPUT_ASSIST] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_PACKED_SP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_PACKED_DP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_SCALAR_SP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_SCALAR_DP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_64BIT_MMX_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_128BIT_MMX_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_X87_FP_UOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_X87_FP_UOP),
-               .escr_msr       = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_TC_MISC] = {
-               .opcode         = P4_OPCODE(P4_EVENT_TC_MISC),
-               .escr_msr       = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_GLOBAL_POWER_EVENTS] = {
-               .opcode         = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_TC_MS_XFER] = {
-               .opcode         = P4_OPCODE(P4_EVENT_TC_MS_XFER),
-               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_UOP_QUEUE_WRITES] = {
-               .opcode         = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
-               .escr_msr       = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD)     |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER)   |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
-               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_RETIRED_BRANCH_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
-               .escr_msr       = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
-               .cntr           = { {4, 5, -1}, {6, 7, -1} },
-       },
-       [P4_EVENT_RESOURCE_STALL] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
-               .escr_msr       = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_WC_BUFFER] = {
-               .opcode         = P4_OPCODE(P4_EVENT_WC_BUFFER),
-               .escr_msr       = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS)               |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
-               .shared         = 1,
-               .cntr           = { {8, 9, -1}, {10, 11, -1} },
-       },
-       [P4_EVENT_B2B_CYCLES] = {
-               .opcode         = P4_OPCODE(P4_EVENT_B2B_CYCLES),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_BNR] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BNR),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_SNOOP] = {
-               .opcode         = P4_OPCODE(P4_EVENT_SNOOP),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_RESPONSE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_RESPONSE),
-               .escr_msr       = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
-               .escr_emask     = 0,
-               .cntr           = { {0, -1, -1}, {2, -1, -1} },
-       },
-       [P4_EVENT_FRONT_END_EVENT] = {
-               .opcode         = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_EXECUTION_EVENT] = {
-               .opcode         = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_REPLAY_EVENT] = {
-               .opcode         = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_INSTR_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)            |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_UOPS_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_UOP_TYPE] = {
-               .opcode         = P4_OPCODE(P4_EVENT_UOP_TYPE),
-               .escr_msr       = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS)                  |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_BRANCH_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_X87_ASSIST] = {
-               .opcode         = P4_OPCODE(P4_EVENT_X87_ASSIST),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU)                    |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_MACHINE_CLEAR] = {
-               .opcode         = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
-               .escr_msr       = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR)                |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR)              |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-       [P4_EVENT_INSTR_COMPLETED] = {
-               .opcode         = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
-               .escr_msr       = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
-               .escr_emask     =
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS)             |
-                       P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
-               .cntr           = { {12, 13, 16}, {14, 15, 17} },
-       },
-};
-
-#define P4_GEN_CACHE_EVENT(event, bit, metric)                           \
-       p4_config_pack_escr(P4_ESCR_EVENT(event)                        | \
-                           P4_ESCR_EMASK_BIT(event, bit))              | \
-       p4_config_pack_cccr(metric                                      | \
-                           P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
-
-static __initconst const u64 p4_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__1stl_cache_load_miss_retired),
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
-       },
-},
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__dtlb_load_miss_retired),
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0,
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_PEBS_METRIC__dtlb_store_miss_retired),
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
-                                               P4_PEBS_METRIC__none),
-               [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
-                                               P4_PEBS_METRIC__none),
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(NODE) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-/*
- * Because of Netburst being quite restricted in how many
- * identical events may run simultaneously, we introduce event aliases,
- * ie the different events which have the same functionality but
- * utilize non-intersected resources (ESCR/CCCR/counter registers).
- *
- * This allow us to relax restrictions a bit and run two or more
- * identical events together.
- *
- * Never set any custom internal bits such as P4_CONFIG_HT,
- * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
- * either up to date automatically or not applicable at all.
- */
-struct p4_event_alias {
-       u64 original;
-       u64 alternative;
-} p4_event_aliases[] = {
-       {
-               /*
-                * Non-halted cycles can be substituted with non-sleeping cycles (see
-                * Intel SDM Vol3b for details). We need this alias to be able
-                * to run nmi-watchdog and 'perf top' (or any other user space tool
-                * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
-                * simultaneously.
-                */
-       .original       =
-               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
-       .alternative    =
-               p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT)             |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
-                                   P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
-               p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT          |
-                                   P4_CCCR_COMPARE),
-       },
-};
-
-static u64 p4_get_alias_event(u64 config)
-{
-       u64 config_match;
-       int i;
-
-       /*
-        * Only event with special mark is allowed,
-        * we're to be sure it didn't come as malformed
-        * RAW event.
-        */
-       if (!(config & P4_CONFIG_ALIASABLE))
-               return 0;
-
-       config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
-
-       for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
-               if (config_match == p4_event_aliases[i].original) {
-                       config_match = p4_event_aliases[i].alternative;
-                       break;
-               } else if (config_match == p4_event_aliases[i].alternative) {
-                       config_match = p4_event_aliases[i].original;
-                       break;
-               }
-       }
-
-       if (i >= ARRAY_SIZE(p4_event_aliases))
-               return 0;
-
-       return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
-}
-
-static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
-  /* non-halted CPU clocks */
-  [PERF_COUNT_HW_CPU_CYCLES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING))       |
-               P4_CONFIG_ALIASABLE,
-
-  /*
-   * retired instructions
-   * in a sake of simplicity we don't use the FSB tagging
-   */
-  [PERF_COUNT_HW_INSTRUCTIONS] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED)               |
-               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG)           |
-               P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
-
-  /* cache hits */
-  [PERF_COUNT_HW_CACHE_REFERENCES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
-
-  /* cache misses */
-  [PERF_COUNT_HW_CACHE_MISSES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS)   |
-               P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
-
-  /* branch instructions retired */
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL)    |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL)           |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
-
-  /* mispredicted branches retired */
-  [PERF_COUNT_HW_BRANCH_MISSES]        =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED)      |
-               P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
-
-  /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN):  */
-  [PERF_COUNT_HW_BUS_CYCLES] =
-       p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY)           |
-               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV)         |
-               P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN))        |
-       p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
-};
-
-static struct p4_event_bind *p4_config_get_bind(u64 config)
-{
-       unsigned int evnt = p4_config_unpack_event(config);
-       struct p4_event_bind *bind = NULL;
-
-       if (evnt < ARRAY_SIZE(p4_event_bind_map))
-               bind = &p4_event_bind_map[evnt];
-
-       return bind;
-}
-
-static u64 p4_pmu_event_map(int hw_event)
-{
-       struct p4_event_bind *bind;
-       unsigned int esel;
-       u64 config;
-
-       config = p4_general_events[hw_event];
-       bind = p4_config_get_bind(config);
-       esel = P4_OPCODE_ESEL(bind->opcode);
-       config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
-
-       return config;
-}
-
-/* check cpu model specifics */
-static bool p4_event_match_cpu_model(unsigned int event_idx)
-{
-       /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
-       if (event_idx == P4_EVENT_INSTR_COMPLETED) {
-               if (boot_cpu_data.x86_model != 3 &&
-                       boot_cpu_data.x86_model != 4 &&
-                       boot_cpu_data.x86_model != 6)
-                       return false;
-       }
-
-       /*
-        * For info
-        * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
-        */
-
-       return true;
-}
-
-static int p4_validate_raw_event(struct perf_event *event)
-{
-       unsigned int v, emask;
-
-       /* User data may have out-of-bound event index */
-       v = p4_config_unpack_event(event->attr.config);
-       if (v >= ARRAY_SIZE(p4_event_bind_map))
-               return -EINVAL;
-
-       /* It may be unsupported: */
-       if (!p4_event_match_cpu_model(v))
-               return -EINVAL;
-
-       /*
-        * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
-        * in Architectural Performance Monitoring, it means not
-        * on _which_ logical cpu to count but rather _when_, ie it
-        * depends on logical cpu state -- count event if one cpu active,
-        * none, both or any, so we just allow user to pass any value
-        * desired.
-        *
-        * In turn we always set Tx_OS/Tx_USR bits bound to logical
-        * cpu without their propagation to another cpu
-        */
-
-       /*
-        * if an event is shared across the logical threads
-        * the user needs special permissions to be able to use it
-        */
-       if (p4_ht_active() && p4_event_bind_map[v].shared) {
-               if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-                       return -EACCES;
-       }
-
-       /* ESCR EventMask bits may be invalid */
-       emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
-       if (emask & ~p4_event_bind_map[v].escr_emask)
-               return -EINVAL;
-
-       /*
-        * it may have some invalid PEBS bits
-        */
-       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
-               return -EINVAL;
-
-       v = p4_config_unpack_metric(event->attr.config);
-       if (v >= ARRAY_SIZE(p4_pebs_bind_map))
-               return -EINVAL;
-
-       return 0;
-}
-
-static int p4_hw_config(struct perf_event *event)
-{
-       int cpu = get_cpu();
-       int rc = 0;
-       u32 escr, cccr;
-
-       /*
-        * the reason we use cpu that early is that: if we get scheduled
-        * first time on the same cpu -- we will not need swap thread
-        * specific flags in config (and will save some cpu cycles)
-        */
-
-       cccr = p4_default_cccr_conf(cpu);
-       escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
-                                        event->attr.exclude_user);
-       event->hw.config = p4_config_pack_escr(escr) |
-                          p4_config_pack_cccr(cccr);
-
-       if (p4_ht_active() && p4_ht_thread(cpu))
-               event->hw.config = p4_set_ht_bit(event->hw.config);
-
-       if (event->attr.type == PERF_TYPE_RAW) {
-               struct p4_event_bind *bind;
-               unsigned int esel;
-               /*
-                * Clear bits we reserve to be managed by kernel itself
-                * and never allowed from a user space
-                */
-                event->attr.config &= P4_CONFIG_MASK;
-
-               rc = p4_validate_raw_event(event);
-               if (rc)
-                       goto out;
-
-               /*
-                * Note that for RAW events we allow user to use P4_CCCR_RESERVED
-                * bits since we keep additional info here (for cache events and etc)
-                */
-               event->hw.config |= event->attr.config;
-               bind = p4_config_get_bind(event->attr.config);
-               if (!bind) {
-                       rc = -EINVAL;
-                       goto out;
-               }
-               esel = P4_OPCODE_ESEL(bind->opcode);
-               event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
-       }
-
-       rc = x86_setup_perfctr(event);
-out:
-       put_cpu();
-       return rc;
-}
-
-static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
-{
-       u64 v;
-
-       /* an official way for overflow indication */
-       rdmsrl(hwc->config_base, v);
-       if (v & P4_CCCR_OVF) {
-               wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
-               return 1;
-       }
-
-       /*
-        * In some circumstances the overflow might issue an NMI but did
-        * not set P4_CCCR_OVF bit. Because a counter holds a negative value
-        * we simply check for high bit being set, if it's cleared it means
-        * the counter has reached zero value and continued counting before
-        * real NMI signal was received:
-        */
-       rdmsrl(hwc->event_base, v);
-       if (!(v & ARCH_P4_UNFLAGGED_BIT))
-               return 1;
-
-       return 0;
-}
-
-static void p4_pmu_disable_pebs(void)
-{
-       /*
-        * FIXME
-        *
-        * It's still allowed that two threads setup same cache
-        * events so we can't simply clear metrics until we knew
-        * no one is depending on us, so we need kind of counter
-        * for "ReplayEvent" users.
-        *
-        * What is more complex -- RAW events, if user (for some
-        * reason) will pass some cache event metric with improper
-        * event opcode -- it's fine from hardware point of view
-        * but completely nonsense from "meaning" of such action.
-        *
-        * So at moment let leave metrics turned on forever -- it's
-        * ok for now but need to be revisited!
-        *
-        * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0);
-        * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0);
-        */
-}
-
-static inline void p4_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-
-       /*
-        * If event gets disabled while counter is in overflowed
-        * state we need to clear P4_CCCR_OVF, otherwise interrupt get
-        * asserted again and again
-        */
-       (void)wrmsrl_safe(hwc->config_base,
-               p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
-}
-
-static void p4_pmu_disable_all(void)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct perf_event *event = cpuc->events[idx];
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-               p4_pmu_disable_event(event);
-       }
-
-       p4_pmu_disable_pebs();
-}
-
-/* configuration must be valid */
-static void p4_pmu_enable_pebs(u64 config)
-{
-       struct p4_pebs_bind *bind;
-       unsigned int idx;
-
-       BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
-
-       idx = p4_config_unpack_metric(config);
-       if (idx == P4_PEBS_METRIC__none)
-               return;
-
-       bind = &p4_pebs_bind_map[idx];
-
-       (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
-       (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,      (u64)bind->metric_vert);
-}
-
-static void p4_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       int thread = p4_ht_config_thread(hwc->config);
-       u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
-       unsigned int idx = p4_config_unpack_event(hwc->config);
-       struct p4_event_bind *bind;
-       u64 escr_addr, cccr;
-
-       bind = &p4_event_bind_map[idx];
-       escr_addr = bind->escr_msr[thread];
-
-       /*
-        * - we dont support cascaded counters yet
-        * - and counter 1 is broken (erratum)
-        */
-       WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
-       WARN_ON_ONCE(hwc->idx == 1);
-
-       /* we need a real Event value */
-       escr_conf &= ~P4_ESCR_EVENT_MASK;
-       escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
-
-       cccr = p4_config_unpack_cccr(hwc->config);
-
-       /*
-        * it could be Cache event so we need to write metrics
-        * into additional MSRs
-        */
-       p4_pmu_enable_pebs(hwc->config);
-
-       (void)wrmsrl_safe(escr_addr, escr_conf);
-       (void)wrmsrl_safe(hwc->config_base,
-                               (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
-}
-
-static void p4_pmu_enable_all(int added)
-{
-       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int idx;
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               struct perf_event *event = cpuc->events[idx];
-               if (!test_bit(idx, cpuc->active_mask))
-                       continue;
-               p4_pmu_enable_event(event);
-       }
-}
-
-static int p4_pmu_handle_irq(struct pt_regs *regs)
-{
-       struct perf_sample_data data;
-       struct cpu_hw_events *cpuc;
-       struct perf_event *event;
-       struct hw_perf_event *hwc;
-       int idx, handled = 0;
-       u64 val;
-
-       cpuc = this_cpu_ptr(&cpu_hw_events);
-
-       for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               int overflow;
-
-               if (!test_bit(idx, cpuc->active_mask)) {
-                       /* catch in-flight IRQs */
-                       if (__test_and_clear_bit(idx, cpuc->running))
-                               handled++;
-                       continue;
-               }
-
-               event = cpuc->events[idx];
-               hwc = &event->hw;
-
-               WARN_ON_ONCE(hwc->idx != idx);
-
-               /* it might be unflagged overflow */
-               overflow = p4_pmu_clear_cccr_ovf(hwc);
-
-               val = x86_perf_event_update(event);
-               if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
-                       continue;
-
-               handled += overflow;
-
-               /* event overflow for sure */
-               perf_sample_data_init(&data, 0, hwc->last_period);
-
-               if (!x86_perf_event_set_period(event))
-                       continue;
-
-
-               if (perf_event_overflow(event, &data, regs))
-                       x86_pmu_stop(event, 0);
-       }
-
-       if (handled)
-               inc_irq_stat(apic_perf_irqs);
-
-       /*
-        * When dealing with the unmasking of the LVTPC on P4 perf hw, it has
-        * been observed that the OVF bit flag has to be cleared first _before_
-        * the LVTPC can be unmasked.
-        *
-        * The reason is the NMI line will continue to be asserted while the OVF
-        * bit is set.  This causes a second NMI to generate if the LVTPC is
-        * unmasked before the OVF bit is cleared, leading to unknown NMI
-        * messages.
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-       return handled;
-}
-
-/*
- * swap thread specific fields according to a thread
- * we are going to run on
- */
-static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
-{
-       u32 escr, cccr;
-
-       /*
-        * we either lucky and continue on same cpu or no HT support
-        */
-       if (!p4_should_swap_ts(hwc->config, cpu))
-               return;
-
-       /*
-        * the event is migrated from an another logical
-        * cpu, so we need to swap thread specific flags
-        */
-
-       escr = p4_config_unpack_escr(hwc->config);
-       cccr = p4_config_unpack_cccr(hwc->config);
-
-       if (p4_ht_thread(cpu)) {
-               cccr &= ~P4_CCCR_OVF_PMI_T0;
-               cccr |= P4_CCCR_OVF_PMI_T1;
-               if (escr & P4_ESCR_T0_OS) {
-                       escr &= ~P4_ESCR_T0_OS;
-                       escr |= P4_ESCR_T1_OS;
-               }
-               if (escr & P4_ESCR_T0_USR) {
-                       escr &= ~P4_ESCR_T0_USR;
-                       escr |= P4_ESCR_T1_USR;
-               }
-               hwc->config  = p4_config_pack_escr(escr);
-               hwc->config |= p4_config_pack_cccr(cccr);
-               hwc->config |= P4_CONFIG_HT;
-       } else {
-               cccr &= ~P4_CCCR_OVF_PMI_T1;
-               cccr |= P4_CCCR_OVF_PMI_T0;
-               if (escr & P4_ESCR_T1_OS) {
-                       escr &= ~P4_ESCR_T1_OS;
-                       escr |= P4_ESCR_T0_OS;
-               }
-               if (escr & P4_ESCR_T1_USR) {
-                       escr &= ~P4_ESCR_T1_USR;
-                       escr |= P4_ESCR_T0_USR;
-               }
-               hwc->config  = p4_config_pack_escr(escr);
-               hwc->config |= p4_config_pack_cccr(cccr);
-               hwc->config &= ~P4_CONFIG_HT;
-       }
-}
-
-/*
- * ESCR address hashing is tricky, ESCRs are not sequential
- * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
- * the metric between any ESCRs is laid in range [0xa0,0xe1]
- *
- * so we make ~70% filled hashtable
- */
-
-#define P4_ESCR_MSR_BASE               0x000003a0
-#define P4_ESCR_MSR_MAX                        0x000003e1
-#define P4_ESCR_MSR_TABLE_SIZE         (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1)
-#define P4_ESCR_MSR_IDX(msr)           (msr - P4_ESCR_MSR_BASE)
-#define P4_ESCR_MSR_TABLE_ENTRY(msr)   [P4_ESCR_MSR_IDX(msr)] = msr
-
-static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = {
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0),
-       P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1),
-};
-
-static int p4_get_escr_idx(unsigned int addr)
-{
-       unsigned int idx = P4_ESCR_MSR_IDX(addr);
-
-       if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE      ||
-                       !p4_escr_table[idx]             ||
-                       p4_escr_table[idx] != addr)) {
-               WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
-               return -1;
-       }
-
-       return idx;
-}
-
-static int p4_next_cntr(int thread, unsigned long *used_mask,
-                       struct p4_event_bind *bind)
-{
-       int i, j;
-
-       for (i = 0; i < P4_CNTR_LIMIT; i++) {
-               j = bind->cntr[thread][i];
-               if (j != -1 && !test_bit(j, used_mask))
-                       return j;
-       }
-
-       return -1;
-}
-
-static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
-{
-       unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
-       int cpu = smp_processor_id();
-       struct hw_perf_event *hwc;
-       struct p4_event_bind *bind;
-       unsigned int i, thread, num;
-       int cntr_idx, escr_idx;
-       u64 config_alias;
-       int pass;
-
-       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
-       bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
-
-       for (i = 0, num = n; i < n; i++, num--) {
-
-               hwc = &cpuc->event_list[i]->hw;
-               thread = p4_ht_thread(cpu);
-               pass = 0;
-
-again:
-               /*
-                * It's possible to hit a circular lock
-                * between original and alternative events
-                * if both are scheduled already.
-                */
-               if (pass > 2)
-                       goto done;
-
-               bind = p4_config_get_bind(hwc->config);
-               escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
-               if (unlikely(escr_idx == -1))
-                       goto done;
-
-               if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
-                       cntr_idx = hwc->idx;
-                       if (assign)
-                               assign[i] = hwc->idx;
-                       goto reserve;
-               }
-
-               cntr_idx = p4_next_cntr(thread, used_mask, bind);
-               if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
-                       /*
-                        * Check whether an event alias is still available.
-                        */
-                       config_alias = p4_get_alias_event(hwc->config);
-                       if (!config_alias)
-                               goto done;
-                       hwc->config = config_alias;
-                       pass++;
-                       goto again;
-               }
-               /*
-                * Perf does test runs to see if a whole group can be assigned
-                * together succesfully.  There can be multiple rounds of this.
-                * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
-                * bits, such that the next round of group assignments will
-                * cause the above p4_should_swap_ts to pass instead of fail.
-                * This leads to counters exclusive to thread0 being used by
-                * thread1.
-                *
-                * Solve this with a cheap hack, reset the idx back to -1 to
-                * force a new lookup (p4_next_cntr) to get the right counter
-                * for the right thread.
-                *
-                * This probably doesn't comply with the general spirit of how
-                * perf wants to work, but P4 is special. :-(
-                */
-               if (p4_should_swap_ts(hwc->config, cpu))
-                       hwc->idx = -1;
-               p4_pmu_swap_config_ts(hwc, cpu);
-               if (assign)
-                       assign[i] = cntr_idx;
-reserve:
-               set_bit(cntr_idx, used_mask);
-               set_bit(escr_idx, escr_mask);
-       }
-
-done:
-       return num ? -EINVAL : 0;
-}
-
-PMU_FORMAT_ATTR(cccr, "config:0-31" );
-PMU_FORMAT_ATTR(escr, "config:32-62");
-PMU_FORMAT_ATTR(ht,   "config:63"   );
-
-static struct attribute *intel_p4_formats_attr[] = {
-       &format_attr_cccr.attr,
-       &format_attr_escr.attr,
-       &format_attr_ht.attr,
-       NULL,
-};
-
-static __initconst const struct x86_pmu p4_pmu = {
-       .name                   = "Netburst P4/Xeon",
-       .handle_irq             = p4_pmu_handle_irq,
-       .disable_all            = p4_pmu_disable_all,
-       .enable_all             = p4_pmu_enable_all,
-       .enable                 = p4_pmu_enable_event,
-       .disable                = p4_pmu_disable_event,
-       .eventsel               = MSR_P4_BPU_CCCR0,
-       .perfctr                = MSR_P4_BPU_PERFCTR0,
-       .event_map              = p4_pmu_event_map,
-       .max_events             = ARRAY_SIZE(p4_general_events),
-       .get_event_constraints  = x86_get_event_constraints,
-       /*
-        * IF HT disabled we may need to use all
-        * ARCH_P4_MAX_CCCR counters simulaneously
-        * though leave it restricted at moment assuming
-        * HT is on
-        */
-       .num_counters           = ARCH_P4_MAX_CCCR,
-       .apic                   = 1,
-       .cntval_bits            = ARCH_P4_CNTRVAL_BITS,
-       .cntval_mask            = ARCH_P4_CNTRVAL_MASK,
-       .max_period             = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
-       .hw_config              = p4_hw_config,
-       .schedule_events        = p4_pmu_schedule_events,
-       /*
-        * This handles erratum N15 in intel doc 249199-029,
-        * the counter may not be updated correctly on write
-        * so we need a second write operation to do the trick
-        * (the official workaround didn't work)
-        *
-        * the former idea is taken from OProfile code
-        */
-       .perfctr_second_write   = 1,
-
-       .format_attrs           = intel_p4_formats_attr,
-};
-
-__init int p4_pmu_init(void)
-{
-       unsigned int low, high;
-       int i, reg;
-
-       /* If we get stripped -- indexing fails */
-       BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
-
-       rdmsr(MSR_IA32_MISC_ENABLE, low, high);
-       if (!(low & (1 << 7))) {
-               pr_cont("unsupported Netburst CPU model %d ",
-                       boot_cpu_data.x86_model);
-               return -ENODEV;
-       }
-
-       memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
-               sizeof(hw_cache_event_ids));
-
-       pr_cont("Netburst events, ");
-
-       x86_pmu = p4_pmu;
-
-       /*
-        * Even though the counters are configured to interrupt a particular
-        * logical processor when an overflow happens, testing has shown that
-        * on kdump kernels (which uses a single cpu), thread1's counter
-        * continues to run and will report an NMI on thread0.  Due to the
-        * overflow bug, this leads to a stream of unknown NMIs.
-        *
-        * Solve this by zero'ing out the registers to mimic a reset.
-        */
-       for (i = 0; i < x86_pmu.num_counters; i++) {
-               reg = x86_pmu_config_addr(i);
-               wrmsrl_safe(reg, 0ULL);
-       }
-
-       return 0;
-}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
deleted file mode 100644 (file)
index 7c1a0c0..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-#include <linux/perf_event.h>
-#include <linux/types.h>
-
-#include "perf_event.h"
-
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]           = 0x0079,       /* CPU_CLK_UNHALTED */
-  [PERF_COUNT_HW_INSTRUCTIONS]         = 0x00c0,       /* INST_RETIRED     */
-  [PERF_COUNT_HW_CACHE_REFERENCES]     = 0x0f2e,       /* L2_RQSTS:M:E:S:I */
-  [PERF_COUNT_HW_CACHE_MISSES]         = 0x012e,       /* L2_RQSTS:I       */
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]  = 0x00c4,       /* BR_INST_RETIRED  */
-  [PERF_COUNT_HW_BRANCH_MISSES]                = 0x00c5,       /* BR_MISS_PRED_RETIRED */
-  [PERF_COUNT_HW_BUS_CYCLES]           = 0x0062,       /* BUS_DRDY_CLOCKS  */
-  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2,    /* RESOURCE_STALLS  */
-
-};
-
-static const u64 __initconst p6_hw_cache_event_ids
-                               [PERF_COUNT_HW_CACHE_MAX]
-                               [PERF_COUNT_HW_CACHE_OP_MAX]
-                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS       */
-                [ C(RESULT_MISS)   ] = 0x0045, /* DCU_LINES_IN        */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0f29,  /* L2_LD:M:E:S:I       */
-       },
-        [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-        },
- },
- [ C(L1I ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
-               [ C(RESULT_MISS)   ] = 0x0f28,  /* L2_IFETCH:M:E:S:I  */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(LL  ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0x0025,  /* L2_M_LINES_INM     */
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(DTLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0043,  /* DATA_MEM_REFS      */
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0,
-               [ C(RESULT_MISS)   ] = 0,
-       },
- },
- [ C(ITLB) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0080,  /* IFU_IFETCH         */
-               [ C(RESULT_MISS)   ] = 0x0085,  /* ITLB_MISS          */
-       },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
- [ C(BPU ) ] = {
-       [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x00c4,  /* BR_INST_RETIRED      */
-               [ C(RESULT_MISS)   ] = 0x00c5,  /* BR_MISS_PRED_RETIRED */
-        },
-       [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
-       [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = -1,
-               [ C(RESULT_MISS)   ] = -1,
-       },
- },
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
-       return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Event setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_EVENT                   0x0000002EULL
-
-static struct event_constraint p6_event_constraints[] =
-{
-       INTEL_EVENT_CONSTRAINT(0xc1, 0x1),      /* FLOPS */
-       INTEL_EVENT_CONSTRAINT(0x10, 0x1),      /* FP_COMP_OPS_EXE */
-       INTEL_EVENT_CONSTRAINT(0x11, 0x2),      /* FP_ASSIST */
-       INTEL_EVENT_CONSTRAINT(0x12, 0x2),      /* MUL */
-       INTEL_EVENT_CONSTRAINT(0x13, 0x2),      /* DIV */
-       INTEL_EVENT_CONSTRAINT(0x14, 0x1),      /* CYCLES_DIV_BUSY */
-       EVENT_CONSTRAINT_END
-};
-
-static void p6_pmu_disable_all(void)
-{
-       u64 val;
-
-       /* p6 only has one enable register */
-       rdmsrl(MSR_P6_EVNTSEL0, val);
-       val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
-       wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void p6_pmu_enable_all(int added)
-{
-       unsigned long val;
-
-       /* p6 only has one enable register */
-       rdmsrl(MSR_P6_EVNTSEL0, val);
-       val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-       wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static inline void
-p6_pmu_disable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val = P6_NOP_EVENT;
-
-       (void)wrmsrl_safe(hwc->config_base, val);
-}
-
-static void p6_pmu_enable_event(struct perf_event *event)
-{
-       struct hw_perf_event *hwc = &event->hw;
-       u64 val;
-
-       val = hwc->config;
-
-       /*
-        * p6 only has a global event enable, set on PerfEvtSel0
-        * We "disable" events by programming P6_NOP_EVENT
-        * and we rely on p6_pmu_enable_all() being called
-        * to actually enable the events.
-        */
-
-       (void)wrmsrl_safe(hwc->config_base, val);
-}
-
-PMU_FORMAT_ATTR(event, "config:0-7"    );
-PMU_FORMAT_ATTR(umask, "config:8-15"   );
-PMU_FORMAT_ATTR(edge,  "config:18"     );
-PMU_FORMAT_ATTR(pc,    "config:19"     );
-PMU_FORMAT_ATTR(inv,   "config:23"     );
-PMU_FORMAT_ATTR(cmask, "config:24-31"  );
-
-static struct attribute *intel_p6_formats_attr[] = {
-       &format_attr_event.attr,
-       &format_attr_umask.attr,
-       &format_attr_edge.attr,
-       &format_attr_pc.attr,
-       &format_attr_inv.attr,
-       &format_attr_cmask.attr,
-       NULL,
-};
-
-static __initconst const struct x86_pmu p6_pmu = {
-       .name                   = "p6",
-       .handle_irq             = x86_pmu_handle_irq,
-       .disable_all            = p6_pmu_disable_all,
-       .enable_all             = p6_pmu_enable_all,
-       .enable                 = p6_pmu_enable_event,
-       .disable                = p6_pmu_disable_event,
-       .hw_config              = x86_pmu_hw_config,
-       .schedule_events        = x86_schedule_events,
-       .eventsel               = MSR_P6_EVNTSEL0,
-       .perfctr                = MSR_P6_PERFCTR0,
-       .event_map              = p6_pmu_event_map,
-       .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
-       .apic                   = 1,
-       .max_period             = (1ULL << 31) - 1,
-       .version                = 0,
-       .num_counters           = 2,
-       /*
-        * Events have 40 bits implemented. However they are designed such
-        * that bits [32-39] are sign extensions of bit 31. As such the
-        * effective width of a event for P6-like PMU is 32 bits only.
-        *
-        * See IA-32 Intel Architecture Software developer manual Vol 3B
-        */
-       .cntval_bits            = 32,
-       .cntval_mask            = (1ULL << 32) - 1,
-       .get_event_constraints  = x86_get_event_constraints,
-       .event_constraints      = p6_event_constraints,
-
-       .format_attrs           = intel_p6_formats_attr,
-       .events_sysfs_show      = intel_event_sysfs_show,
-
-};
-
-static __init void p6_pmu_rdpmc_quirk(void)
-{
-       if (boot_cpu_data.x86_mask < 9) {
-               /*
-                * PPro erratum 26; fixed in stepping 9 and above.
-                */
-               pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
-               x86_pmu.attr_rdpmc_broken = 1;
-               x86_pmu.attr_rdpmc = 0;
-       }
-}
-
-__init int p6_pmu_init(void)
-{
-       x86_pmu = p6_pmu;
-
-       switch (boot_cpu_data.x86_model) {
-       case  1: /* Pentium Pro */
-               x86_add_quirk(p6_pmu_rdpmc_quirk);
-               break;
-
-       case  3: /* Pentium II - Klamath */
-       case  5: /* Pentium II - Deschutes */
-       case  6: /* Pentium II - Mendocino */
-               break;
-
-       case  7: /* Pentium III - Katmai */
-       case  8: /* Pentium III - Coppermine */
-       case 10: /* Pentium III Xeon */
-       case 11: /* Pentium III - Tualatin */
-               break;
-
-       case  9: /* Pentium M - Banias */
-       case 13: /* Pentium M - Dothan */
-               break;
-
-       default:
-               pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
-               return -ENODEV;
-       }
-
-       memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
-               sizeof(hw_cache_event_ids));
-
-       return 0;
-}
index 819d94982e078b8597f084f8409fcd3106361293..f6f50c4ceaeceef16170d14d6d55376823bbd63f 100644 (file)
@@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
        for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
                if (!rdrand_long(&tmp)) {
                        clear_cpu_cap(c, X86_FEATURE_RDRAND);
-                       printk_once(KERN_WARNING "rdrand: disabled\n");
+                       pr_warn_once("rdrand: disabled\n");
                        return;
                }
        }
index 4c60eaf0571c2fb1a6d73258861398fc6bcc963e..cd531355e8386b4177d4dcecaf1031ba8c2086c8 100644 (file)
@@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
        c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
        if (!printed) {
-               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+               pr_info("CPU: Physical Processor ID: %d\n",
                       c->phys_proc_id);
                if (c->x86_max_cores > 1)
-                       printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
+                       pr_info("CPU: Processor Core ID: %d\n",
                               c->cpu_core_id);
                printed = 1;
        }
index 252da7aceca67ff580e8189ed267b2ce44d83373..e3b4d18411751c50b02967d372d21687d8854307 100644 (file)
@@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
        if (max >= 0x80860001) {
                cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
                if (cpu_rev != 0x02000000) {
-                       printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
+                       pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
                                (cpu_rev >> 24) & 0xff,
                                (cpu_rev >> 16) & 0xff,
                                (cpu_rev >> 8) & 0xff,
@@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c)
        if (max >= 0x80860002) {
                cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
                if (cpu_rev == 0x02000000) {
-                       printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
+                       pr_info("CPU: Processor revision %08X, %u MHz\n",
                                new_cpu_rev, cpu_freq);
                }
-               printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
+               pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n",
                       (cms_rev1 >> 24) & 0xff,
                       (cms_rev1 >> 16) & 0xff,
                       (cms_rev1 >> 8) & 0xff,
@@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c)
                      (void *)&cpu_info[56],
                      (void *)&cpu_info[60]);
                cpu_info[64] = '\0';
-               printk(KERN_INFO "CPU: %s\n", cpu_info);
+               pr_info("CPU: %s\n", cpu_info);
        }
 
        /* Unhide possibly hidden capability flags */
index 628a059a9a0663ef7cbdf07e4750c9e1e4f53006..364e5834689753fc7da34c6dc8a34cca22ab92db 100644 (file)
@@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void)
        tsc_hz = eax | (((uint64_t)ebx) << 32);
        do_div(tsc_hz, 1000);
        BUG_ON(tsc_hz >> 32);
-       printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+       pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
                         (unsigned long) tsc_hz / 1000,
                         (unsigned long) tsc_hz % 1000);
 
@@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void)
        if (ebx != UINT_MAX)
                x86_platform.calibrate_tsc = vmware_get_tsc_khz;
        else
-               printk(KERN_WARNING
-                      "Failed to get TSC freq from the hypervisor\n");
+               pr_warn("Failed to get TSC freq from the hypervisor\n");
 }
 
 /*
index 58f34319b29ab64aee4bd21e6ff30367d478c262..9ef978d69c22e00cffdce3592cbe5a495027a058 100644 (file)
@@ -57,10 +57,9 @@ struct crash_elf_data {
        struct kimage *image;
        /*
         * Total number of ram ranges we have after various adjustments for
-        * GART, crash reserved region etc.
+        * crash reserved region, etc.
         */
        unsigned int max_nr_ranges;
-       unsigned long gart_start, gart_end;
 
        /* Pointer to elf header */
        void *ehdr;
@@ -201,17 +200,6 @@ static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
        return 0;
 }
 
-static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
-{
-       struct crash_elf_data *ced = arg;
-
-       ced->gart_start = start;
-       ced->gart_end = end;
-
-       /* Not expecting more than 1 gart aperture */
-       return 1;
-}
-
 
 /* Gather all the required information to prepare elf headers for ram regions */
 static void fill_up_crash_elf_data(struct crash_elf_data *ced,
@@ -226,22 +214,6 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced,
 
        ced->max_nr_ranges = nr_ranges;
 
-       /*
-        * We don't create ELF headers for GART aperture as an attempt
-        * to dump this memory in second kernel leads to hang/crash.
-        * If gart aperture is present, one needs to exclude that region
-        * and that could lead to need of extra phdr.
-        */
-       walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
-                               ced, get_gart_ranges_callback);
-
-       /*
-        * If we have gart region, excluding that could potentially split
-        * a memory range, resulting in extra header. Account for  that.
-        */
-       if (ced->gart_end)
-               ced->max_nr_ranges++;
-
        /* Exclusion of crash region could split memory ranges */
        ced->max_nr_ranges++;
 
@@ -350,13 +322,6 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
                        return ret;
        }
 
-       /* Exclude GART region */
-       if (ced->gart_end) {
-               ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
-               if (ret)
-                       return ret;
-       }
-
        return ret;
 }
 
@@ -599,12 +564,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
        /* Add ACPI tables */
        cmd.type = E820_ACPI;
        flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-       walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+       walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
                       memmap_entry_callback);
 
        /* Add ACPI Non-volatile Storage */
        cmd.type = E820_NVS;
-       walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+       walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
                        memmap_entry_callback);
 
        /* Add crashk_low_res region */
index 569c1e4f96feb897956a64e35bd8fccb07dca6c6..837365f1091228448c19a871686c974ac00278d8 100644 (file)
@@ -925,6 +925,41 @@ static const char *e820_type_to_string(int e820_type)
        }
 }
 
+static unsigned long e820_type_to_iomem_type(int e820_type)
+{
+       switch (e820_type) {
+       case E820_RESERVED_KERN:
+       case E820_RAM:
+               return IORESOURCE_SYSTEM_RAM;
+       case E820_ACPI:
+       case E820_NVS:
+       case E820_UNUSABLE:
+       case E820_PRAM:
+       case E820_PMEM:
+       default:
+               return IORESOURCE_MEM;
+       }
+}
+
+static unsigned long e820_type_to_iores_desc(int e820_type)
+{
+       switch (e820_type) {
+       case E820_ACPI:
+               return IORES_DESC_ACPI_TABLES;
+       case E820_NVS:
+               return IORES_DESC_ACPI_NV_STORAGE;
+       case E820_PMEM:
+               return IORES_DESC_PERSISTENT_MEMORY;
+       case E820_PRAM:
+               return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
+       case E820_RESERVED_KERN:
+       case E820_RAM:
+       case E820_UNUSABLE:
+       default:
+               return IORES_DESC_NONE;
+       }
+}
+
 static bool do_mark_busy(u32 type, struct resource *res)
 {
        /* this is the legacy bios/dos rom-shadow + mmio region */
@@ -967,7 +1002,8 @@ void __init e820_reserve_resources(void)
                res->start = e820.map[i].addr;
                res->end = end;
 
-               res->flags = IORESOURCE_MEM;
+               res->flags = e820_type_to_iomem_type(e820.map[i].type);
+               res->desc = e820_type_to_iores_desc(e820.map[i].type);
 
                /*
                 * don't register the region that could be conflicted with
index d25097c3fc1d1af8af35c156f05121f9f4d46a94..d5804adfa6da6c3d495b124faf47c288e2c0c6cf 100644 (file)
@@ -409,8 +409,10 @@ static inline void copy_init_fpstate_to_fpregs(void)
 {
        if (use_xsave())
                copy_kernel_to_xregs(&init_fpstate.xsave, -1);
-       else
+       else if (static_cpu_has(X86_FEATURE_FXSR))
                copy_kernel_to_fxregs(&init_fpstate.fxsave);
+       else
+               copy_kernel_to_fregs(&init_fpstate.fsave);
 }
 
 /*
index 6d9f0a7ef4c8e5da3d596444242de7478d7b9356..bd08fb77073d833ec8f1e27bc67d20fa37b559dc 100644 (file)
@@ -78,13 +78,15 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
        cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
        write_cr0(cr0);
 
-       asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
-                    : "+m" (fsw), "+m" (fcw));
+       if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
+               asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+                            : "+m" (fsw), "+m" (fcw));
 
-       if (fsw == 0 && (fcw & 0x103f) == 0x003f)
-               set_cpu_cap(c, X86_FEATURE_FPU);
-       else
-               clear_cpu_cap(c, X86_FEATURE_FPU);
+               if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+                       set_cpu_cap(c, X86_FEATURE_FPU);
+               else
+                       clear_cpu_cap(c, X86_FEATURE_FPU);
+       }
 
 #ifndef CONFIG_MATH_EMULATION
        if (!cpu_has_fpu) {
@@ -132,7 +134,7 @@ static void __init fpu__init_system_generic(void)
         * Set up the legacy init FPU context. (xstate init might overwrite this
         * with a more modern format, if the CPU supports it.)
         */
-       fpstate_init_fxstate(&init_fpstate.fxsave);
+       fpstate_init(&init_fpstate);
 
        fpu__init_system_mxcsr();
 }
@@ -300,12 +302,6 @@ u64 __init fpu__get_supported_xfeatures_mask(void)
 static void __init fpu__clear_eager_fpu_features(void)
 {
        setup_clear_cpu_cap(X86_FEATURE_MPX);
-       setup_clear_cpu_cap(X86_FEATURE_AVX);
-       setup_clear_cpu_cap(X86_FEATURE_AVX2);
-       setup_clear_cpu_cap(X86_FEATURE_AVX512F);
-       setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
-       setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
-       setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
 }
 
 /*
index 29408d6d66267c12ce4eab826e38bac3d8367d17..05c9e3f5b6d7b601286d62ef7f4fe1a754033255 100644 (file)
@@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end)
 static unsigned long text_ip_addr(unsigned long ip)
 {
        /*
-        * On x86_64, kernel text mappings are mapped read-only with
-        * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
-        * of the kernel text mapping to modify the kernel text.
+        * On x86_64, kernel text mappings are mapped read-only, so we use
+        * the kernel identity mapping instead of the kernel text mapping
+        * to modify the kernel text.
         *
         * For 32bit kernels, these mappings are same and we can use
         * kernel identity mapping to modify code.
index 44256a62702b2c51077fc0b8b82a904ed122b9f6..ed15cd486d06347626c080a0081e3eed01ac9128 100644 (file)
@@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
        int err;
-#ifdef CONFIG_DEBUG_RODATA
        char opc[BREAK_INSTR_SIZE];
-#endif /* CONFIG_DEBUG_RODATA */
 
        bpt->type = BP_BREAKPOINT;
        err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
                return err;
        err = probe_kernel_write((char *)bpt->bpt_addr,
                                 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
-#ifdef CONFIG_DEBUG_RODATA
        if (!err)
                return err;
        /*
@@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
        if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
                return -EINVAL;
        bpt->type = BP_POKE_BREAKPOINT;
-#endif /* CONFIG_DEBUG_RODATA */
+
        return err;
 }
 
 int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
-#ifdef CONFIG_DEBUG_RODATA
        int err;
        char opc[BREAK_INSTR_SIZE];
 
@@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
        if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
                goto knl_write;
        return err;
+
 knl_write:
-#endif /* CONFIG_DEBUG_RODATA */
        return probe_kernel_write((char *)bpt->bpt_addr,
                                  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
 }
index 30ca7607cbbbbcae4793aa5c14d8f73bbd784d71..97340f2c437c64def7a83f75a8f9b0bc6a968451 100644 (file)
@@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
        processor.cpuflag = CPU_ENABLED;
        processor.cpufeature = (boot_cpu_data.x86 << 8) |
            (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-       processor.featureflag = boot_cpu_data.x86_capability[0];
+       processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
        processor.reserved[0] = 0;
        processor.reserved[1] = 0;
        for (i = 0; i < 2; i++) {
index 8a2cdd736fa4da82374fa9392e76b5716cc0f89a..04b132a767f116e8ff35efcbbc036e331a145a4b 100644 (file)
@@ -30,6 +30,7 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
+#include <asm/cache.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/nmi.h>
@@ -69,7 +70,7 @@ struct nmi_stats {
 
 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
 
-static int ignore_nmis;
+static int ignore_nmis __read_mostly;
 
 int unknown_nmi_panic;
 /*
index 14415aff18136524cfa3d4c9b07bdde6ee0d6f54..92f70147a9a673e74b3753637adbb08e2a4869fa 100644 (file)
@@ -13,11 +13,11 @@ static int found(u64 start, u64 end, void *data)
 
 static __init int register_e820_pmem(void)
 {
-       char *pmem = "Persistent Memory (legacy)";
        struct platform_device *pdev;
        int rc;
 
-       rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+       rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY,
+                                IORESOURCE_MEM, 0, -1, NULL, found);
        if (rc <= 0)
                return 0;
 
index 9f7c21c22477e59462d72e930d79a4c2a238a051..9decee2bfdbeedb795d3e1f423e606cdec5e49e0 100644 (file)
@@ -418,9 +418,9 @@ static void mwait_idle(void)
        if (!current_set_polling_and_test()) {
                trace_cpu_idle_rcuidle(1, smp_processor_id());
                if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
-                       smp_mb(); /* quirk */
+                       mb(); /* quirk */
                        clflush((void *)&current_thread_info()->flags);
-                       smp_mb(); /* quirk */
+                       mb(); /* quirk */
                }
 
                __monitor((void *)&current_thread_info()->flags, 0, 0);
index d3d80e6d42a20ea665325b4cf2a5e7be3c43289a..aa52c10094755e2fa9fdd5ac7a39b0c36effac8a 100644 (file)
@@ -152,21 +152,21 @@ static struct resource data_resource = {
        .name   = "Kernel data",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource code_resource = {
        .name   = "Kernel code",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 static struct resource bss_resource = {
        .name   = "Kernel bss",
        .start  = 0,
        .end    = 0,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags  = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
 };
 
 
index 24d57f77b3c19615840ac4f09c8c0fd299864698..3bf1e0b5f827ae43dca2b2c0c3c4040e56d15c56 100644 (file)
@@ -97,6 +97,14 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
+/* Logical package management. We might want to allocate that dynamically */
+static int *physical_to_logical_pkg __read_mostly;
+static unsigned long *physical_package_map __read_mostly;;
+static unsigned long *logical_package_map  __read_mostly;
+static unsigned int max_physical_pkg_id __read_mostly;
+unsigned int __max_logical_packages __read_mostly;
+EXPORT_SYMBOL(__max_logical_packages);
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
        unsigned long flags;
@@ -251,6 +259,97 @@ static void notrace start_secondary(void *unused)
        cpu_startup_entry(CPUHP_ONLINE);
 }
 
+int topology_update_package_map(unsigned int apicid, unsigned int cpu)
+{
+       unsigned int new, pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+       /* Called from early boot ? */
+       if (!physical_package_map)
+               return 0;
+
+       if (pkg >= max_physical_pkg_id)
+               return -EINVAL;
+
+       /* Set the logical package id */
+       if (test_and_set_bit(pkg, physical_package_map))
+               goto found;
+
+       if (pkg < __max_logical_packages) {
+               set_bit(pkg, logical_package_map);
+               physical_to_logical_pkg[pkg] = pkg;
+               goto found;
+       }
+       new = find_first_zero_bit(logical_package_map, __max_logical_packages);
+       if (new >= __max_logical_packages) {
+               physical_to_logical_pkg[pkg] = -1;
+               pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+                       apicid, pkg);
+               return -ENOSPC;
+       }
+       set_bit(new, logical_package_map);
+       pr_info("APIC(%x) Converting physical %u to logical package %u\n",
+               apicid, pkg, new);
+       physical_to_logical_pkg[pkg] = new;
+
+found:
+       cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+       return 0;
+}
+
+/**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+       if (phys_pkg >= max_physical_pkg_id)
+               return -1;
+       return physical_to_logical_pkg[phys_pkg];
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+static void __init smp_init_package_map(void)
+{
+       unsigned int ncpus, cpu;
+       size_t size;
+
+       /*
+        * Today neither Intel nor AMD support heterogenous systems. That
+        * might change in the future....
+        */
+       ncpus = boot_cpu_data.x86_max_cores * smp_num_siblings;
+       __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+
+       /*
+        * Possibly larger than what we need as the number of apic ids per
+        * package can be smaller than the actual used apic ids.
+        */
+       max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
+       size = max_physical_pkg_id * sizeof(unsigned int);
+       physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
+       memset(physical_to_logical_pkg, 0xff, size);
+       size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
+       physical_package_map = kzalloc(size, GFP_KERNEL);
+       size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
+       logical_package_map = kzalloc(size, GFP_KERNEL);
+
+       pr_info("Max logical packages: %u\n", __max_logical_packages);
+
+       for_each_present_cpu(cpu) {
+               unsigned int apicid = apic->cpu_present_to_apicid(cpu);
+
+               if (apicid == BAD_APICID || !apic->apic_id_valid(apicid))
+                       continue;
+               if (!topology_update_package_map(apicid, cpu))
+                       continue;
+               pr_warn("CPU %u APICId %x disabled\n", cpu, apicid);
+               per_cpu(x86_bios_cpu_apicid, cpu) = BAD_APICID;
+               set_cpu_possible(cpu, false);
+               set_cpu_present(cpu, false);
+       }
+}
+
 void __init smp_store_boot_cpu_info(void)
 {
        int id = 0; /* CPU 0 */
@@ -258,6 +357,7 @@ void __init smp_store_boot_cpu_info(void)
 
        *c = boot_cpu_data;
        c->cpu_index = id;
+       smp_init_package_map();
 }
 
 /*
index 3f92ce07e525fd41167a64f9d7c4df1bfbe673bf..27538f183c3b15d9d59e225f2612b12660ee979b 100644 (file)
@@ -142,7 +142,6 @@ static int test_NX(void)
         * by the error message
         */
 
-#ifdef CONFIG_DEBUG_RODATA
        /* Test 3: Check if the .rodata section is executable */
        if (rodata_test_data != 0xC3) {
                printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
@@ -151,7 +150,6 @@ static int test_NX(void)
                printk(KERN_ERR "test_nx: .rodata section is executable\n");
                ret = -ENODEV;
        }
-#endif
 
 #if 0
        /* Test 4: Check if the .data section of a module is executable */
index 5ecbfe5099dad68e140b85ffdaefc75818afc6d9..cb4a01b41e277887b6769e4a17d60cb4139f9864 100644 (file)
@@ -76,5 +76,5 @@ int rodata_test(void)
 }
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
+MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
index 74e4bf11f562e0354c227518421e2375ec16fafa..fe133b710befa9a7495178374076e465ec61668f 100644 (file)
@@ -41,29 +41,28 @@ ENTRY(phys_startup_64)
 jiffies_64 = jiffies;
 #endif
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
 /*
- * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
- * we retain large page mappings for boundaries spanning kernel text, rodata
- * and data sections.
+ * On 64-bit, align RODATA to 2MB so we retain large page mappings for
+ * boundaries spanning kernel text, rodata and data sections.
  *
  * However, kernel identity mappings will have different RWX permissions
  * to the pages mapping to text and to the pages padding (which are freed) the
  * text section. Hence kernel identity mappings will be broken to smaller
  * pages. For 64-bit, kernel text and kernel identity mappings are different,
- * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
- * as well as retain 2MB large page mappings for kernel text.
+ * so we can enable protection checks as well as retain 2MB large page
+ * mappings for kernel text.
  */
-#define X64_ALIGN_DEBUG_RODATA_BEGIN   . = ALIGN(HPAGE_SIZE);
+#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
 
-#define X64_ALIGN_DEBUG_RODATA_END                             \
+#define X64_ALIGN_RODATA_END                                   \
                . = ALIGN(HPAGE_SIZE);                          \
                __end_rodata_hpage_align = .;
 
 #else
 
-#define X64_ALIGN_DEBUG_RODATA_BEGIN
-#define X64_ALIGN_DEBUG_RODATA_END
+#define X64_ALIGN_RODATA_BEGIN
+#define X64_ALIGN_RODATA_END
 
 #endif
 
@@ -112,13 +111,11 @@ SECTIONS
 
        EXCEPTION_TABLE(16) :text = 0x9090
 
-#if defined(CONFIG_DEBUG_RODATA)
        /* .text should occupy whole number of pages */
        . = ALIGN(PAGE_SIZE);
-#endif
-       X64_ALIGN_DEBUG_RODATA_BEGIN
+       X64_ALIGN_RODATA_BEGIN
        RO_DATA(PAGE_SIZE)
-       X64_ALIGN_DEBUG_RODATA_END
+       X64_ALIGN_RODATA_END
 
        /* Data */
        .data : AT(ADDR(.data) - LOAD_OFFSET) {
index 95a955de5964bcc3f4aa6791a004e29b28504c13..1e7a49bfc94fb323cbb11782693cab89743f1133 100644 (file)
@@ -3721,13 +3721,15 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
+       bool uses_nx = context->nx || context->base_role.smep_andnot_wp;
+
        /*
         * Passing "true" to the last argument is okay; it adds a check
         * on bit 8 of the SPTEs which KVM doesn't use anyway.
         */
        __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
                                boot_cpu_data.x86_phys_bits,
-                               context->shadow_root_level, context->nx,
+                               context->shadow_root_level, uses_nx,
                                guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
                                true);
 }
index 0ff453749a909b9f0140b5de5d5d2dbb133e99d9..9bd8f44baded2318e8b5bc2a4773068a45a8723b 100644 (file)
@@ -1813,6 +1813,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                        return;
                }
                break;
+       case MSR_IA32_PEBS_ENABLE:
+               /* PEBS needs a quiescent period after being disabled (to write
+                * a record).  Disabling PEBS through VMX MSR swapping doesn't
+                * provide that period, so a CPU could write host's record into
+                * guest's memory.
+                */
+               wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
        }
 
        for (i = 0; i < m->nr; ++i)
@@ -1850,26 +1857,31 @@ static void reload_tss(void)
 
 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 {
-       u64 guest_efer;
-       u64 ignore_bits;
+       u64 guest_efer = vmx->vcpu.arch.efer;
+       u64 ignore_bits = 0;
 
-       guest_efer = vmx->vcpu.arch.efer;
+       if (!enable_ept) {
+               /*
+                * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+                * host CPUID is more efficient than testing guest CPUID
+                * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+                */
+               if (boot_cpu_has(X86_FEATURE_SMEP))
+                       guest_efer |= EFER_NX;
+               else if (!(guest_efer & EFER_NX))
+                       ignore_bits |= EFER_NX;
+       }
 
        /*
-        * NX is emulated; LMA and LME handled by hardware; SCE meaningless
-        * outside long mode
+        * LMA and LME handled by hardware; SCE meaningless outside long mode.
         */
-       ignore_bits = EFER_NX | EFER_SCE;
+       ignore_bits |= EFER_SCE;
 #ifdef CONFIG_X86_64
        ignore_bits |= EFER_LMA | EFER_LME;
        /* SCE is meaningful only in long mode on Intel */
        if (guest_efer & EFER_LMA)
                ignore_bits &= ~(u64)EFER_SCE;
 #endif
-       guest_efer &= ~ignore_bits;
-       guest_efer |= host_efer & ignore_bits;
-       vmx->guest_msrs[efer_offset].data = guest_efer;
-       vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
        clear_atomic_switch_msr(vmx, MSR_EFER);
 
@@ -1880,16 +1892,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
         */
        if (cpu_has_load_ia32_efer ||
            (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
-               guest_efer = vmx->vcpu.arch.efer;
                if (!(guest_efer & EFER_LMA))
                        guest_efer &= ~EFER_LME;
                if (guest_efer != host_efer)
                        add_atomic_switch_msr(vmx, MSR_EFER,
                                              guest_efer, host_efer);
                return false;
-       }
+       } else {
+               guest_efer &= ~ignore_bits;
+               guest_efer |= host_efer & ignore_bits;
 
-       return true;
+               vmx->guest_msrs[efer_offset].data = guest_efer;
+               vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
+
+               return true;
+       }
 }
 
 static unsigned long segment_base(u16 selector)
index 4ba229ac3f4ff127dddf0c33fb91cc42f2e60b22..fd57d3ae7e16daf24f8cfdb4e708c5e4e9a9f3f9 100644 (file)
@@ -1520,12 +1520,6 @@ __init void lguest_init(void)
         */
        reserve_top_address(lguest_data.reserve_mem);
 
-       /*
-        * If we don't initialize the lock dependency checker now, it crashes
-        * atomic_notifier_chain_register, then paravirt_disable_iospace.
-        */
-       lockdep_init();
-
        /* Hook in our special panic hypercall code. */
        atomic_notifier_chain_register(&panic_notifier_list, &paniced);
 
@@ -1535,7 +1529,7 @@ __init void lguest_init(void)
         */
        cpu_detect(&new_cpu_data);
        /* head.S usually sets up the first capability word, so do it here. */
-       new_cpu_data.x86_capability[0] = cpuid_edx(1);
+       new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 
        /* Math is always hard! */
        set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
index e912b2f6d36e5392b33bf6749f19a53cff6d7a06..2f07c291dcc855af5c483c55133d91a4dff21c92 100644 (file)
@@ -102,7 +102,7 @@ static void delay_mwaitx(unsigned long __loops)
                 * Use cpu_tss as a cacheline-aligned, seldomly
                 * accessed per-cpu variable as the monitor target.
                 */
-               __monitorx(this_cpu_ptr(&cpu_tss), 0, 0);
+               __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
 
                /*
                 * AMD, like Intel, supports the EAX hint and EAX=0xf
index cb4ef3de61f9ae9c95249876965a71a5d7b58cf8..2ebfbaf611424be1937c4e2288776d50d1c8ff9e 100644 (file)
@@ -871,7 +871,6 @@ static noinline int do_test_wp_bit(void)
        return flag;
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
@@ -960,5 +959,3 @@ void mark_rodata_ro(void)
        if (__supported_pte_mask & _PAGE_NX)
                debug_checkwx();
 }
-#endif
-
index 5488d21123bd2edb3f71ded119495474f3e8b728..a40b755c67e31280ded6fe10d81a18927523059c 100644 (file)
@@ -1074,7 +1074,6 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 const int rodata_test_data = 0xC3;
 EXPORT_SYMBOL_GPL(rodata_test_data);
 
@@ -1166,8 +1165,6 @@ void mark_rodata_ro(void)
        debug_checkwx();
 }
 
-#endif
-
 int kern_addr_valid(unsigned long addr)
 {
        unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
index 9cf96d82147a8da38ce4f57413d75075003cb244..1c37e650acacff422460b1d9b705a867e01a09a5 100644 (file)
@@ -283,7 +283,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
                   __pa_symbol(__end_rodata) >> PAGE_SHIFT))
                pgprot_val(forbidden) |= _PAGE_RW;
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64)
        /*
         * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
         * kernel text mappings for the large page aligned text, rodata sections
index 2d66db8f80f992d3b0609373502031aacf91f161..ed30e79347e86377198009dcafb1c0b26a9865df 100644 (file)
@@ -130,6 +130,27 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
 }
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
 
+/*
+ * Helper function for efi_reserve_boot_services() to figure out if we
+ * can free regions in efi_free_boot_services().
+ *
+ * Use this function to ensure we do not free regions owned by somebody
+ * else. We must only reserve (and then free) regions:
+ *
+ * - Not within any part of the kernel
+ * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc)
+ */
+static bool can_free_region(u64 start, u64 size)
+{
+       if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
+               return false;
+
+       if (!e820_all_mapped(start, start+size, E820_RAM))
+               return false;
+
+       return true;
+}
+
 /*
  * The UEFI specification makes it clear that the operating system is free to do
  * whatever it wants with boot services code after ExitBootServices() has been
@@ -147,26 +168,50 @@ void __init efi_reserve_boot_services(void)
                efi_memory_desc_t *md = p;
                u64 start = md->phys_addr;
                u64 size = md->num_pages << EFI_PAGE_SHIFT;
+               bool already_reserved;
 
                if (md->type != EFI_BOOT_SERVICES_CODE &&
                    md->type != EFI_BOOT_SERVICES_DATA)
                        continue;
-               /* Only reserve where possible:
-                * - Not within any already allocated areas
-                * - Not over any memory area (really needed, if above?)
-                * - Not within any part of the kernel
-                * - Not the bios reserved area
-               */
-               if ((start + size > __pa_symbol(_text)
-                               && start <= __pa_symbol(_end)) ||
-                       !e820_all_mapped(start, start+size, E820_RAM) ||
-                       memblock_is_region_reserved(start, size)) {
-                       /* Could not reserve, skip it */
-                       md->num_pages = 0;
-                       memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n",
-                                    start, start+size-1);
-               } else
+
+               already_reserved = memblock_is_region_reserved(start, size);
+
+               /*
+                * Because the following memblock_reserve() is paired
+                * with free_bootmem_late() for this region in
+                * efi_free_boot_services(), we must be extremely
+                * careful not to reserve, and subsequently free,
+                * critical regions of memory (like the kernel image) or
+                * those regions that somebody else has already
+                * reserved.
+                *
+                * A good example of a critical region that must not be
+                * freed is page zero (first 4Kb of memory), which may
+                * contain boot services code/data but is marked
+                * E820_RESERVED by trim_bios_range().
+                */
+               if (!already_reserved) {
                        memblock_reserve(start, size);
+
+                       /*
+                        * If we are the first to reserve the region, no
+                        * one else cares about it. We own it and can
+                        * free it later.
+                        */
+                       if (can_free_region(start, size))
+                               continue;
+               }
+
+               /*
+                * We don't own the region. We must not free it.
+                *
+                * Setting this bit for a boot services region really
+                * doesn't make sense as far as the firmware is
+                * concerned, but it does provide us with a way to tag
+                * those regions that must not be paired with
+                * free_bootmem_late().
+                */
+               md->attribute |= EFI_MEMORY_RUNTIME;
        }
 }
 
@@ -183,8 +228,8 @@ void __init efi_free_boot_services(void)
                    md->type != EFI_BOOT_SERVICES_DATA)
                        continue;
 
-               /* Could not reserve boot area */
-               if (!size)
+               /* Do not free, someone else owns it: */
+               if (md->attribute & EFI_MEMORY_RUNTIME)
                        continue;
 
                free_bootmem_late(start, size);
index d09e4c9d7cc5b4044c4421bde8692aaa6f8b21be..2c261082eadf82f693d062e7f600660fcd56817f 100644 (file)
@@ -1654,7 +1654,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
        cpu_detect(&new_cpu_data);
        set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
        new_cpu_data.wp_works_ok = 1;
-       new_cpu_data.x86_capability[0] = cpuid_edx(1);
+       new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
 #endif
 
        if (xen_start_info->mod_start) {
index 724a08740a04b4a255ca080c103ec3b7381bc4f7..9466354d3e4962f14cdae33b09378a73f9c51a5d 100644 (file)
@@ -11,7 +11,7 @@
 #include "pmu.h"
 
 /* x86_pmu.handle_irq definition */
-#include "../kernel/cpu/perf_event.h"
+#include "../events/perf_event.h"
 
 #define XENPMU_IRQ_PROCESSING    1
 struct xenpmu {
index 296b7a14893aabba895c578e8671e36b34875a37..b6f7fa3a1d403ea95428808f49edab475422e80a 100644 (file)
@@ -62,7 +62,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
        if (count < 0) {
                return NULL;
        } else if (count > 0) {
-               resources = kmalloc(count * sizeof(struct resource),
+               resources = kzalloc(count * sizeof(struct resource),
                                    GFP_KERNEL);
                if (!resources) {
                        dev_err(&adev->dev, "No memory for resources\n");
index 305218539df28cda17b97992ff44a0dbaeb613de..d48cbed342c1dda500d5020c746a9caf1c72ad09 100644 (file)
@@ -269,8 +269,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state,
         */
        if (ACPI_SUCCESS(status) &&
            possible_method_call && (node->type == ACPI_TYPE_METHOD)) {
-               if (GET_CURRENT_ARG_TYPE(walk_state->arg_types) ==
-                   ARGP_SUPERNAME) {
+               if (walk_state->opcode == AML_UNLOAD_OP) {
                        /*
                         * acpi_ps_get_next_namestring has increased the AML pointer,
                         * so we need to restore the saved AML pointer for method call.
@@ -697,7 +696,7 @@ static union acpi_parse_object *acpi_ps_get_next_field(struct acpi_parse_state
  *
  * PARAMETERS:  walk_state          - Current state
  *              parser_state        - Current parser state object
- *              arg_type            - The parser argument type (ARGP_*)
+ *              arg_type            - The argument type (AML_*_ARG)
  *              return_arg          - Where the next arg is returned
  *
  * RETURN:      Status, and an op object containing the next argument.
@@ -817,9 +816,9 @@ acpi_ps_get_next_arg(struct acpi_walk_state *walk_state,
                                return_ACPI_STATUS(AE_NO_MEMORY);
                        }
 
-                       /* super_name allows argument to be a method call */
+                       /* To support super_name arg of Unload */
 
-                       if (arg_type == ARGP_SUPERNAME) {
+                       if (walk_state->opcode == AML_UNLOAD_OP) {
                                status =
                                    acpi_ps_get_next_namepath(walk_state,
                                                              parser_state, arg,
index 0431883653bed9d3c0394368fe87ced99043f185..559c1173de1c99177ba702c60d27993826510e9c 100644 (file)
@@ -519,7 +519,7 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
                             u64 param3, u64 param4)
 {
        int rc;
-       unsigned long pfn;
+       u64 base_addr, size;
 
        /* If user manually set "flags", make sure it is legal */
        if (flags && (flags &
@@ -545,10 +545,17 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
        /*
         * Disallow crazy address masks that give BIOS leeway to pick
         * injection address almost anywhere. Insist on page or
-        * better granularity and that target address is normal RAM.
+        * better granularity and that target address is normal RAM or
+        * NVDIMM.
         */
-       pfn = PFN_DOWN(param1 & param2);
-       if (!page_is_ram(pfn) || ((param2 & PAGE_MASK) != PAGE_MASK))
+       base_addr = param1 & param2;
+       size = ~param2 + 1;
+
+       if (((param2 & PAGE_MASK) != PAGE_MASK) ||
+           ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE)
+                               != REGION_INTERSECTS) &&
+            (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY)
+                               != REGION_INTERSECTS)))
                return -EINVAL;
 
 inject:
index c359351d50f1c99e9758b74ea84d97e76b402b0f..a163f2c59aa36571359e75661d52cfdc55d4ef3b 100644 (file)
@@ -218,7 +218,7 @@ bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
        bool ret;
 
        ret = __fwnode_property_present(fwnode, propname);
-       if (ret == false && fwnode && fwnode->secondary)
+       if (ret == false && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
                ret = __fwnode_property_present(fwnode->secondary, propname);
        return ret;
 }
@@ -423,7 +423,7 @@ EXPORT_SYMBOL_GPL(device_property_match_string);
        int _ret_;                                                                      \
        _ret_ = FWNODE_PROP_READ(_fwnode_, _propname_, _type_, _proptype_,              \
                                 _val_, _nval_);                                        \
-       if (_ret_ == -EINVAL && _fwnode_ && _fwnode_->secondary)                        \
+       if (_ret_ == -EINVAL && _fwnode_ && !IS_ERR_OR_NULL(_fwnode_->secondary))       \
                _ret_ = FWNODE_PROP_READ(_fwnode_->secondary, _propname_, _type_,       \
                                _proptype_, _val_, _nval_);                             \
        _ret_;                                                                          \
@@ -593,7 +593,7 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
        int ret;
 
        ret = __fwnode_property_read_string_array(fwnode, propname, val, nval);
-       if (ret == -EINVAL && fwnode && fwnode->secondary)
+       if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
                ret = __fwnode_property_read_string_array(fwnode->secondary,
                                                          propname, val, nval);
        return ret;
@@ -621,7 +621,7 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode,
        int ret;
 
        ret = __fwnode_property_read_string(fwnode, propname, val);
-       if (ret == -EINVAL && fwnode && fwnode->secondary)
+       if (ret == -EINVAL && fwnode && !IS_ERR_OR_NULL(fwnode->secondary))
                ret = __fwnode_property_read_string(fwnode->secondary,
                                                    propname, val);
        return ret;
index 64f5d1bdbb48defdddc4c7ed7ee0c5b988b92e1b..8e304b1befc56385b8fc34d7697a529289d8ae3b 100644 (file)
 #define AT_XDMAC_MAX_CHAN      0x20
 #define AT_XDMAC_MAX_CSIZE     16      /* 16 data */
 #define AT_XDMAC_MAX_DWIDTH    8       /* 64 bits */
+#define AT_XDMAC_RESIDUE_MAX_RETRIES   5
 
 #define AT_XDMAC_DMA_BUSWIDTHS\
        (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
@@ -1395,8 +1396,8 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
        struct at_xdmac_desc    *desc, *_desc;
        struct list_head        *descs_list;
        enum dma_status         ret;
-       int                     residue;
-       u32                     cur_nda, mask, value;
+       int                     residue, retry;
+       u32                     cur_nda, check_nda, cur_ubc, mask, value;
        u8                      dwidth = 0;
        unsigned long           flags;
 
@@ -1433,7 +1434,42 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                        cpu_relax();
        }
 
+       /*
+        * When processing the residue, we need to read two registers but we
+        * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
+        * we stand in the descriptor list and AT_XDMAC_CUBC is used
+        * to know how many data are remaining for the current descriptor.
+        * Since the dma channel is not paused to not loose data, between the
+        * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
+        * descriptor.
+        * For that reason, after reading AT_XDMAC_CUBC, we check if we are
+        * still using the same descriptor by reading a second time
+        * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
+        * read again AT_XDMAC_CUBC.
+        * Memory barriers are used to ensure the read order of the registers.
+        * A max number of retries is set because unlikely it can never ends if
+        * we are transferring a lot of data with small buffers.
+        */
        cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+       rmb();
+       cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+       for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
+               rmb();
+               check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+
+               if (likely(cur_nda == check_nda))
+                       break;
+
+               cur_nda = check_nda;
+               rmb();
+               cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+       }
+
+       if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
+               ret = DMA_ERROR;
+               goto spin_unlock;
+       }
+
        /*
         * Remove size of all microblocks already transferred and the current
         * one. Then add the remaining size to transfer of the current
@@ -1446,7 +1482,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
                        break;
        }
-       residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
+       residue += cur_ubc << dwidth;
 
        dma_set_residue(txstate, residue);
 
index 2209f75fdf05bf29114f0fb3ffedff89f55b43f3..aac85c30c2cf6fc64669841c5b6abb5317df1b94 100644 (file)
@@ -522,6 +522,8 @@ static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan,
                        chan_dbg(chan, "LD %p callback\n", desc);
                        txd->callback(txd->callback_param);
                }
+
+               dma_descriptor_unmap(txd);
        }
 
        /* Run any dependencies */
index e4f43125e0fbbf5017cd848478eeb01550a87508..f039cfadf17b307c32bd88ef0e4a4a5ac749b590 100644 (file)
@@ -1300,10 +1300,10 @@ static int iop_adma_probe(struct platform_device *pdev)
         * note: writecombine gives slightly better performance, but
         * requires that we explicitly flush the writes
         */
-       adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
-                                                         plat_data->pool_size,
-                                                         &adev->dma_desc_pool,
-                                                         GFP_KERNEL);
+       adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev,
+                                               plat_data->pool_size,
+                                               &adev->dma_desc_pool,
+                                               GFP_KERNEL);
        if (!adev->dma_desc_pool_virt) {
                ret = -ENOMEM;
                goto err_free_adev;
index 14091f878f80a5ca7cd0843e70284d890ad49f0c..3922a5d5680604f831308dc02a50b93685a089ce 100644 (file)
@@ -964,8 +964,8 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
         * requires that we explicitly flush the writes
         */
        mv_chan->dma_desc_pool_virt =
-         dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE,
-                                &mv_chan->dma_desc_pool, GFP_KERNEL);
+         dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool,
+                      GFP_KERNEL);
        if (!mv_chan->dma_desc_pool_virt)
                return ERR_PTR(-ENOMEM);
 
index 5a250cdc83769f0d4aaff65530c76b705a474e25..d34aef7a101b669994e2a92b88e448437f0bb6f8 100644 (file)
@@ -502,8 +502,8 @@ static int bam_alloc_chan(struct dma_chan *chan)
                return 0;
 
        /* allocate FIFO descriptor space, but only if necessary */
-       bchan->fifo_virt = dma_alloc_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
-                               &bchan->fifo_phys, GFP_KERNEL);
+       bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+                                       &bchan->fifo_phys, GFP_KERNEL);
 
        if (!bchan->fifo_virt) {
                dev_err(bdev->dev, "Failed to allocate desc fifo\n");
@@ -538,8 +538,8 @@ static void bam_free_chan(struct dma_chan *chan)
        bam_reset_channel(bchan);
        spin_unlock_irqrestore(&bchan->vc.lock, flags);
 
-       dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
-                               bchan->fifo_phys);
+       dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
+                   bchan->fifo_phys);
        bchan->fifo_virt = NULL;
 
        /* mask irq for pipe/channel */
@@ -1231,9 +1231,9 @@ static int bam_dma_remove(struct platform_device *pdev)
                bam_dma_terminate_all(&bdev->channels[i].vc.chan);
                tasklet_kill(&bdev->channels[i].vc.task);
 
-               dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
-                       bdev->channels[i].fifo_virt,
-                       bdev->channels[i].fifo_phys);
+               dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+                           bdev->channels[i].fifo_virt,
+                           bdev->channels[i].fifo_phys);
        }
 
        tasklet_kill(&bdev->task);
index 34e54a313bf46b93eaae5abc1dde48cf21889ae9..93f0d4120289fa92d01a6873fac6d2efd37337e7 100644 (file)
@@ -1574,7 +1574,7 @@ static int knl_get_dimm_capacity(struct sbridge_pvt *pvt, u64 *mc_sizes)
                                for (cha = 0; cha < KNL_MAX_CHAS; cha++) {
                                        if (knl_get_mc_route(target,
                                                mc_route_reg[cha]) == channel
-                                               && participants[channel]) {
+                                               && !participants[channel]) {
                                                participant_count++;
                                                participants[channel] = 1;
                                                break;
index 8297bc319369d6e4e4dd1579195ca1b6161a57d1..1846d65b72859284b31c536dc2fdc1a29cae0d9e 100644 (file)
@@ -96,7 +96,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
         * In practice this won't execute very often unless on very fast
         * machines because the time window for this to happen is very small.
         */
-       while (amdgpuCrtc->enabled && repcnt--) {
+       while (amdgpuCrtc->enabled && --repcnt) {
                /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
                 * start in hpos, and to the "fudged earlier" vblank start in
                 * vpos.
@@ -112,13 +112,13 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
                        break;
 
                /* Sleep at least until estimated real start of hw vblank */
-               spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
                min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
                if (min_udelay > vblank->framedur_ns / 2000) {
                        /* Don't wait ridiculously long - something is wrong */
                        repcnt = 0;
                        break;
                }
+               spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
                usleep_range(min_udelay, 2 * min_udelay);
                spin_lock_irqsave(&crtc->dev->event_lock, flags);
        };
index 21aacc1f45c1fd7afded7f6088c8a365e9005224..bf731e9f643e9ecddd367034179fae6a40d294ce 100644 (file)
@@ -265,15 +265,27 @@ static int amdgpu_atombios_dp_get_dp_link_config(struct drm_connector *connector
        unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
        unsigned lane_num, i, max_pix_clock;
 
-       for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-               for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
-                       max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+       if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+           ENCODER_OBJECT_ID_NUTMEG) {
+               for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+                       max_pix_clock = (lane_num * 270000 * 8) / bpp;
                        if (max_pix_clock >= pix_clock) {
                                *dp_lanes = lane_num;
-                               *dp_rate = link_rates[i];
+                               *dp_rate = 270000;
                                return 0;
                        }
                }
+       } else {
+               for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+                       for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+                               max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+                               if (max_pix_clock >= pix_clock) {
+                                       *dp_lanes = lane_num;
+                                       *dp_rate = link_rates[i];
+                                       return 0;
+                               }
+                       }
+               }
        }
 
        return -EINVAL;
index e5df53b6e229ff0348541eeaa921acc6cf4700ef..1f500a1b99695a6bb83b3d596b9a2101f6f1f8ef 100644 (file)
@@ -109,8 +109,8 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
        if (IS_ERR(cma_obj))
                return cma_obj;
 
-       cma_obj->vaddr = dma_alloc_writecombine(drm->dev, size,
-                       &cma_obj->paddr, GFP_KERNEL | __GFP_NOWARN);
+       cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
+                                     GFP_KERNEL | __GFP_NOWARN);
        if (!cma_obj->vaddr) {
                dev_err(drm->dev, "failed to allocate buffer with size %zu\n",
                        size);
@@ -192,8 +192,8 @@ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj)
        cma_obj = to_drm_gem_cma_obj(gem_obj);
 
        if (cma_obj->vaddr) {
-               dma_free_writecombine(gem_obj->dev->dev, cma_obj->base.size,
-                                     cma_obj->vaddr, cma_obj->paddr);
+               dma_free_wc(gem_obj->dev->dev, cma_obj->base.size,
+                           cma_obj->vaddr, cma_obj->paddr);
        } else if (gem_obj->import_attach) {
                drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
        }
@@ -324,9 +324,8 @@ static int drm_gem_cma_mmap_obj(struct drm_gem_cma_object *cma_obj,
        vma->vm_flags &= ~VM_PFNMAP;
        vma->vm_pgoff = 0;
 
-       ret = dma_mmap_writecombine(cma_obj->base.dev->dev, vma,
-                                   cma_obj->vaddr, cma_obj->paddr,
-                                   vma->vm_end - vma->vm_start);
+       ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
+                         cma_obj->paddr, vma->vm_end - vma->vm_start);
        if (ret)
                drm_gem_vm_close(vma);
 
index a33162cf4f4cc6c4f0aee66433680a15f64cfffc..3c1ce44483d991416fed77f8945dc6686f3a3341 100644 (file)
@@ -1113,8 +1113,8 @@ struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
        if (!cmdbuf)
                return NULL;
 
-       cmdbuf->vaddr = dma_alloc_writecombine(gpu->dev, size, &cmdbuf->paddr,
-                                              GFP_KERNEL);
+       cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
+                                    GFP_KERNEL);
        if (!cmdbuf->vaddr) {
                kfree(cmdbuf);
                return NULL;
@@ -1128,8 +1128,8 @@ struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
 
 void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 {
-       dma_free_writecombine(cmdbuf->gpu->dev, cmdbuf->size,
-                             cmdbuf->vaddr, cmdbuf->paddr);
+       dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
+                   cmdbuf->paddr);
        kfree(cmdbuf);
 }
 
index 34e38749a8176f87001e54cb4ea8a7cc86fa744e..f8ee740c0e264d488b937503745ac9de06bbc9b1 100644 (file)
@@ -1382,8 +1382,16 @@ static void tda998x_connector_destroy(struct drm_connector *connector)
        drm_connector_cleanup(connector);
 }
 
+static int tda998x_connector_dpms(struct drm_connector *connector, int mode)
+{
+       if (drm_core_check_feature(connector->dev, DRIVER_ATOMIC))
+               return drm_atomic_helper_connector_dpms(connector, mode);
+       else
+               return drm_helper_connector_dpms(connector, mode);
+}
+
 static const struct drm_connector_funcs tda998x_connector_funcs = {
-       .dpms = drm_atomic_helper_connector_dpms,
+       .dpms = tda998x_connector_dpms,
        .reset = drm_atomic_helper_connector_reset,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .detect = tda998x_connector_detect,
index 31f6d212fb1bbbfa1ce64efe8cebde2a5f9f575d..30f921421b0c944217832ba86856a6904f8fef11 100644 (file)
@@ -527,6 +527,8 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder)
 
        mutex_lock(&dev_priv->av_mutex);
        intel_dig_port->audio_connector = connector;
+       /* referred in audio callbacks */
+       dev_priv->dig_port_map[port] = intel_encoder;
        mutex_unlock(&dev_priv->av_mutex);
 
        if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
@@ -554,6 +556,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder)
 
        mutex_lock(&dev_priv->av_mutex);
        intel_dig_port->audio_connector = NULL;
+       dev_priv->dig_port_map[port] = NULL;
        mutex_unlock(&dev_priv->av_mutex);
 
        if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
index 0f3df2c39f7cdd3a69d8f0044cbd725e3b9bac6d..084d5586585d012891423067703fde0f9daec0b1 100644 (file)
@@ -3358,7 +3358,6 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
        intel_encoder->get_config = intel_ddi_get_config;
 
        intel_dig_port->port = port;
-       dev_priv->dig_port_map[port] = intel_encoder;
        intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
                                          (DDI_BUF_PORT_REVERSAL |
                                           DDI_A_4_LANES);
index 1d8de43bed56839fd1fee3a75b6d61b6221c7c0a..cdc2c15873dcc71189ed3263aad55e3c63493ed3 100644 (file)
@@ -6045,7 +6045,6 @@ intel_dp_init(struct drm_device *dev,
        }
 
        intel_dig_port->port = port;
-       dev_priv->dig_port_map[port] = intel_encoder;
        intel_dig_port->dp.output_reg = output_reg;
 
        intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
index cb5d1b15755c3b19a496105c7a8df99982e83cb9..616108c4bc3e5741f59c9a1066aa2df3b63d5fc5 100644 (file)
@@ -2154,7 +2154,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 void intel_hdmi_init(struct drm_device *dev,
                     i915_reg_t hdmi_reg, enum port port)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_digital_port *intel_dig_port;
        struct intel_encoder *intel_encoder;
        struct intel_connector *intel_connector;
@@ -2223,7 +2222,6 @@ void intel_hdmi_init(struct drm_device *dev,
                intel_encoder->cloneable |= 1 << INTEL_OUTPUT_HDMI;
 
        intel_dig_port->port = port;
-       dev_priv->dig_port_map[port] = intel_encoder;
        intel_dig_port->hdmi.hdmi_reg = hdmi_reg;
        intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 
index deb8282c26d83f952473ae145c4fef0b3112b9f1..52fbe530fc9eac207093271a05d44dbf3515e11c 100644 (file)
@@ -664,6 +664,12 @@ int intel_setup_gmbus(struct drm_device *dev)
 
                bus->adapter.algo = &gmbus_algorithm;
 
+               /*
+                * We wish to retry with bit banging
+                * after a timed out GMBUS attempt.
+                */
+               bus->adapter.retries = 1;
+
                /* By default use a conservative clock rate */
                bus->reg0 = pin | GMBUS_RATE_100KHZ;
 
index 30a57185bdb4e41e7ff26a0d4af36577a218b0ea..287226311413c7036cecb02e973e6109d0b26548 100644 (file)
@@ -64,6 +64,7 @@ static void ipu_fb_enable(struct ipu_crtc *ipu_crtc)
        /* Start DC channel and DI after IDMAC */
        ipu_dc_enable_channel(ipu_crtc->dc);
        ipu_di_enable(ipu_crtc->di);
+       drm_crtc_vblank_on(&ipu_crtc->base);
 
        ipu_crtc->enabled = 1;
 }
@@ -80,6 +81,7 @@ static void ipu_fb_disable(struct ipu_crtc *ipu_crtc)
        ipu_di_disable(ipu_crtc->di);
        ipu_plane_disable(ipu_crtc->plane[0]);
        ipu_dc_disable(ipu);
+       drm_crtc_vblank_off(&ipu_crtc->base);
 
        ipu_crtc->enabled = 0;
 }
index 591ba2f1ae03674224b4719660d547ad4e64c0b1..26bb1b626fe3d817812e18e19e0bfb64a4247c3c 100644 (file)
@@ -42,6 +42,7 @@ static const uint32_t ipu_plane_formats[] = {
        DRM_FORMAT_YVYU,
        DRM_FORMAT_YUV420,
        DRM_FORMAT_YVU420,
+       DRM_FORMAT_RGB565,
 };
 
 int ipu_plane_irq(struct ipu_plane *ipu_plane)
index dfebdc4aa0f24ac00f47ab3062d1e24e9b455648..85dfe3674b4138ef02e1b3f043699459d9decb25 100644 (file)
@@ -573,10 +573,9 @@ static int omap_dmm_remove(struct platform_device *dev)
 
                kfree(omap_dmm->engines);
                if (omap_dmm->refill_va)
-                       dma_free_writecombine(omap_dmm->dev,
-                               REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-                               omap_dmm->refill_va,
-                               omap_dmm->refill_pa);
+                       dma_free_wc(omap_dmm->dev,
+                                   REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+                                   omap_dmm->refill_va, omap_dmm->refill_pa);
                if (omap_dmm->dummy_page)
                        __free_page(omap_dmm->dummy_page);
 
@@ -701,9 +700,9 @@ static int omap_dmm_probe(struct platform_device *dev)
        omap_dmm->dummy_pa = page_to_phys(omap_dmm->dummy_page);
 
        /* alloc refill memory */
-       omap_dmm->refill_va = dma_alloc_writecombine(&dev->dev,
-                               REFILL_BUFFER_SIZE * omap_dmm->num_engines,
-                               &omap_dmm->refill_pa, GFP_KERNEL);
+       omap_dmm->refill_va = dma_alloc_wc(&dev->dev,
+                                          REFILL_BUFFER_SIZE * omap_dmm->num_engines,
+                                          &omap_dmm->refill_pa, GFP_KERNEL);
        if (!omap_dmm->refill_va) {
                dev_err(&dev->dev, "could not allocate refill memory\n");
                goto fail;
index 8495a1a4b61745524dd04045616d859b2ac566bb..359b0d7e8ef78faf6cd9d6e044cfb5088f99f0be 100644 (file)
@@ -1330,8 +1330,8 @@ void omap_gem_free_object(struct drm_gem_object *obj)
                        omap_gem_detach_pages(obj);
 
                if (!is_shmem(obj)) {
-                       dma_free_writecombine(dev->dev, obj->size,
-                                       omap_obj->vaddr, omap_obj->paddr);
+                       dma_free_wc(dev->dev, obj->size, omap_obj->vaddr,
+                                   omap_obj->paddr);
                } else if (omap_obj->vaddr) {
                        vunmap(omap_obj->vaddr);
                }
@@ -1395,8 +1395,8 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev,
                /* attempt to allocate contiguous memory if we don't
                 * have DMM for remappign discontiguous buffers
                 */
-               omap_obj->vaddr =  dma_alloc_writecombine(dev->dev, size,
-                               &omap_obj->paddr, GFP_KERNEL);
+               omap_obj->vaddr =  dma_alloc_wc(dev->dev, size,
+                                               &omap_obj->paddr, GFP_KERNEL);
                if (!omap_obj->vaddr) {
                        kfree(omap_obj);
 
index 44ee72e04df9e953bafe64cdfdadf2f01c1f9bce..6af832545bc5b76b44e836553e72005f01012bbe 100644 (file)
@@ -315,15 +315,27 @@ int radeon_dp_get_dp_link_config(struct drm_connector *connector,
        unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
        unsigned lane_num, i, max_pix_clock;
 
-       for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
-               for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
-                       max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+       if (radeon_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+           ENCODER_OBJECT_ID_NUTMEG) {
+               for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+                       max_pix_clock = (lane_num * 270000 * 8) / bpp;
                        if (max_pix_clock >= pix_clock) {
                                *dp_lanes = lane_num;
-                               *dp_rate = link_rates[i];
+                               *dp_rate = 270000;
                                return 0;
                        }
                }
+       } else {
+               for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+                       for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+                               max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+                               if (max_pix_clock >= pix_clock) {
+                                       *dp_lanes = lane_num;
+                                       *dp_rate = link_rates[i];
+                                       return 0;
+                               }
+                       }
+               }
        }
 
        return -EINVAL;
index 2b9ba03a7c1a84bc00564fe0f6391771977c2e7b..2d9196a447fdc94a49140dcad365b6d3957d43f6 100644 (file)
@@ -455,7 +455,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
         * In practice this won't execute very often unless on very fast
         * machines because the time window for this to happen is very small.
         */
-       while (radeon_crtc->enabled && repcnt--) {
+       while (radeon_crtc->enabled && --repcnt) {
                /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
                 * start in hpos, and to the "fudged earlier" vblank start in
                 * vpos.
@@ -471,13 +471,13 @@ static void radeon_flip_work_func(struct work_struct *__work)
                        break;
 
                /* Sleep at least until estimated real start of hw vblank */
-               spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
                min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
                if (min_udelay > vblank->framedur_ns / 2000) {
                        /* Don't wait ridiculously long - something is wrong */
                        repcnt = 0;
                        break;
                }
+               spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
                usleep_range(min_udelay, 2 * min_udelay);
                spin_lock_irqsave(&crtc->dev->event_lock, flags);
        };
index 0f14d897baf9b32974a97eaa59153363e3d8c97b..7a98823bacd1cd5da33c9e11dc7f798ae1a866d9 100644 (file)
@@ -1079,6 +1079,8 @@ force:
 
        /* update display watermarks based on new power state */
        radeon_bandwidth_update(rdev);
+       /* update displays */
+       radeon_dpm_display_configuration_changed(rdev);
 
        /* wait for the rings to drain */
        for (i = 0; i < RADEON_NUM_RINGS; i++) {
@@ -1095,9 +1097,6 @@ force:
 
        radeon_dpm_post_set_power_state(rdev);
 
-       /* update displays */
-       radeon_dpm_display_configuration_changed(rdev);
-
        rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
        rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
        rdev->pm.dpm.single_display = single_display;
index 807863106b8da813949383f291830c60d087d4e0..bd736ace3f8147b25c3362cb2e46aeeef2ef87b8 100644 (file)
@@ -157,17 +157,15 @@ static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
                cursor->height = src_h;
 
                if (cursor->pixmap.base)
-                       dma_free_writecombine(cursor->dev,
-                                             cursor->pixmap.size,
-                                             cursor->pixmap.base,
-                                             cursor->pixmap.paddr);
+                       dma_free_wc(cursor->dev, cursor->pixmap.size,
+                                   cursor->pixmap.base, cursor->pixmap.paddr);
 
                cursor->pixmap.size = cursor->width * cursor->height;
 
-               cursor->pixmap.base = dma_alloc_writecombine(cursor->dev,
-                                                       cursor->pixmap.size,
-                                                       &cursor->pixmap.paddr,
-                                                       GFP_KERNEL | GFP_DMA);
+               cursor->pixmap.base = dma_alloc_wc(cursor->dev,
+                                                  cursor->pixmap.size,
+                                                  &cursor->pixmap.paddr,
+                                                  GFP_KERNEL | GFP_DMA);
                if (!cursor->pixmap.base) {
                        DRM_ERROR("Failed to allocate memory for pixmap\n");
                        return;
@@ -252,8 +250,8 @@ struct drm_plane *sti_cursor_create(struct drm_device *drm_dev,
 
        /* Allocate clut buffer */
        size = 0x100 * sizeof(unsigned short);
-       cursor->clut = dma_alloc_writecombine(dev, size, &cursor->clut_paddr,
-                                             GFP_KERNEL | GFP_DMA);
+       cursor->clut = dma_alloc_wc(dev, size, &cursor->clut_paddr,
+                                   GFP_KERNEL | GFP_DMA);
 
        if (!cursor->clut) {
                DRM_ERROR("Failed to allocate memory for cursor clut\n");
@@ -286,7 +284,7 @@ struct drm_plane *sti_cursor_create(struct drm_device *drm_dev,
        return &cursor->plane.drm_plane;
 
 err_plane:
-       dma_free_writecombine(dev, size, cursor->clut, cursor->clut_paddr);
+       dma_free_wc(dev, size, cursor->clut, cursor->clut_paddr);
 err_clut:
        devm_kfree(dev, cursor);
        return NULL;
index f9a1d92c9d9519c3ddbeefe33277f26af4f37a32..514551c857bbda87b8fcbbca215bdc17c7bdfe67 100644 (file)
@@ -312,8 +312,7 @@ static void sti_gdp_init(struct sti_gdp *gdp)
        /* Allocate all the nodes within a single memory page */
        size = sizeof(struct sti_gdp_node) *
            GDP_NODE_PER_FIELD * GDP_NODE_NB_BANK;
-       base = dma_alloc_writecombine(gdp->dev,
-                                     size, &dma_addr, GFP_KERNEL | GFP_DMA);
+       base = dma_alloc_wc(gdp->dev, size, &dma_addr, GFP_KERNEL | GFP_DMA);
 
        if (!base) {
                DRM_ERROR("Failed to allocate memory for GDP node\n");
index 43861b52261d7e4fee17839351577e5d4b90080f..1d3c3d029603d030ec02809fd14026d6d5df08da 100644 (file)
@@ -617,9 +617,9 @@ static void sti_hqvdp_init(struct sti_hqvdp *hqvdp)
 
        /* Allocate memory for the VDP commands */
        size = NB_VDP_CMD * sizeof(struct sti_hqvdp_cmd);
-       hqvdp->hqvdp_cmd = dma_alloc_writecombine(hqvdp->dev, size,
-                                        &hqvdp->hqvdp_cmd_paddr,
-                                        GFP_KERNEL | GFP_DMA);
+       hqvdp->hqvdp_cmd = dma_alloc_wc(hqvdp->dev, size,
+                                       &hqvdp->hqvdp_cmd_paddr,
+                                       GFP_KERNEL | GFP_DMA);
        if (!hqvdp->hqvdp_cmd) {
                DRM_ERROR("Failed to allocate memory for VDP cmd\n");
                return;
index 33add93b4ed96826a3f7da9bfc5d11d0018eff8b..3b0d8c392b707e93e56a0b391345d58d3a5eef4c 100644 (file)
@@ -175,8 +175,7 @@ static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo)
                sg_free_table(bo->sgt);
                kfree(bo->sgt);
        } else if (bo->vaddr) {
-               dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr,
-                                     bo->paddr);
+               dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
        }
 }
 
@@ -233,8 +232,8 @@ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo)
        } else {
                size_t size = bo->gem.size;
 
-               bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
-                                                  GFP_KERNEL | __GFP_NOWARN);
+               bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->paddr,
+                                        GFP_KERNEL | __GFP_NOWARN);
                if (!bo->vaddr) {
                        dev_err(drm->dev,
                                "failed to allocate buffer of size %zu\n",
@@ -472,8 +471,8 @@ int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma)
                vma->vm_flags &= ~VM_PFNMAP;
                vma->vm_pgoff = 0;
 
-               ret = dma_mmap_writecombine(gem->dev->dev, vma, bo->vaddr,
-                                           bo->paddr, gem->size);
+               ret = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->paddr,
+                                 gem->size);
                if (ret) {
                        drm_gem_vm_close(vma);
                        return ret;
index 22278bcfc60eac4ed40fac0e31dda840aa4b703e..034ef2de903769f521c50c6a6936a5099e69578f 100644 (file)
@@ -398,9 +398,8 @@ int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
        vma->vm_flags &= ~VM_PFNMAP;
        vma->vm_pgoff = 0;
 
-       ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
-                                   bo->base.vaddr, bo->base.paddr,
-                                   vma->vm_end - vma->vm_start);
+       ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
+                         bo->base.paddr, vma->vm_end - vma->vm_start);
        if (ret)
                drm_gem_vm_close(vma);
 
index db082bea8dafd023fa6ad19719f7fadf5ebfcad5..c5a1a08b0449004670b79947e68f0ba7fb8afb50 100644 (file)
@@ -563,6 +563,8 @@ static void vmw_sou_connector_destroy(struct drm_connector *connector)
 
 static const struct drm_connector_funcs vmw_sou_connector_funcs = {
        .dpms = vmw_du_connector_dpms,
+       .detect = vmw_du_connector_detect,
+       .fill_modes = vmw_du_connector_fill_modes,
        .set_property = vmw_du_connector_set_property,
        .destroy = vmw_sou_connector_destroy,
 };
index 5a8c8d55317ab4f622c5754f509d5d3660a3e47c..a18db4d5347cefa99847df944be3a3b364de089a 100644 (file)
@@ -52,8 +52,8 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
        struct host1x *host1x = cdma_to_host1x(cdma);
 
        if (pb->phys != 0)
-               dma_free_writecombine(host1x->dev, pb->size_bytes + 4,
-                                     pb->mapped, pb->phys);
+               dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped,
+                           pb->phys);
 
        pb->mapped = NULL;
        pb->phys = 0;
@@ -76,8 +76,8 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
        pb->pos = 0;
 
        /* allocate and map pushbuffer memory */
-       pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4,
-                                           &pb->phys, GFP_KERNEL);
+       pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys,
+                                 GFP_KERNEL);
        if (!pb->mapped)
                goto fail;
 
index 63bd63f3c7dfd2da2fd3d9ae58bba1473bceb70b..defa7995f2131a06ddc88e09d13d914f8a244ea1 100644 (file)
@@ -467,9 +467,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
                size += g->words * sizeof(u32);
        }
 
-       job->gather_copy_mapped = dma_alloc_writecombine(dev, size,
-                                                        &job->gather_copy,
-                                                        GFP_KERNEL);
+       job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
+                                              GFP_KERNEL);
        if (!job->gather_copy_mapped) {
                job->gather_copy_mapped = NULL;
                return -ENOMEM;
@@ -578,9 +577,8 @@ void host1x_job_unpin(struct host1x_job *job)
        job->num_unpins = 0;
 
        if (job->gather_copy_size)
-               dma_free_writecombine(job->channel->dev, job->gather_copy_size,
-                                     job->gather_copy_mapped,
-                                     job->gather_copy);
+               dma_free_wc(job->channel->dev, job->gather_copy_size,
+                           job->gather_copy_mapped, job->gather_copy);
 }
 EXPORT_SYMBOL(host1x_job_unpin);
 
index f2e13eb8339ffc1287110e247e0e54e922b7d72a..e00db3f510dd425c62565d913c937c5638a072d5 100644 (file)
@@ -1050,6 +1050,17 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
        for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
                const struct ipu_platform_reg *reg = &client_reg[i];
                struct platform_device *pdev;
+               struct device_node *of_node;
+
+               /* Associate subdevice with the corresponding port node */
+               of_node = of_graph_get_port_by_id(dev->of_node, i);
+               if (!of_node) {
+                       dev_info(dev,
+                                "no port@%d node in %s, not using %s%d\n",
+                                i, dev->of_node->full_name,
+                                (i / 2) ? "DI" : "CSI", i % 2);
+                       continue;
+               }
 
                pdev = platform_device_alloc(reg->name, id++);
                if (!pdev) {
@@ -1057,17 +1068,9 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
                        goto err_register;
                }
 
+               pdev->dev.of_node = of_node;
                pdev->dev.parent = dev;
 
-               /* Associate subdevice with the corresponding port node */
-               pdev->dev.of_node = of_graph_get_port_by_id(dev->of_node, i);
-               if (!pdev->dev.of_node) {
-                       dev_err(dev, "missing port@%d node in %s\n", i,
-                               dev->of_node->full_name);
-                       ret = -ENODEV;
-                       goto err_register;
-               }
-
                ret = platform_device_add_data(pdev, &reg->pdata,
                                               sizeof(reg->pdata));
                if (!ret)
@@ -1289,10 +1292,6 @@ static int ipu_probe(struct platform_device *pdev)
        ipu->irq_sync = irq_sync;
        ipu->irq_err = irq_err;
 
-       ret = ipu_irq_init(ipu);
-       if (ret)
-               goto out_failed_irq;
-
        ret = device_reset(&pdev->dev);
        if (ret) {
                dev_err(&pdev->dev, "failed to reset: %d\n", ret);
@@ -1302,6 +1301,10 @@ static int ipu_probe(struct platform_device *pdev)
        if (ret)
                goto out_failed_reset;
 
+       ret = ipu_irq_init(ipu);
+       if (ret)
+               goto out_failed_irq;
+
        /* Set MCU_T to divide MCU access window into 2 */
        ipu_cm_write(ipu, 0x00400000L | (IPU_MCU_T_DEFAULT << 18),
                        IPU_DISP_GEN);
@@ -1324,9 +1327,9 @@ static int ipu_probe(struct platform_device *pdev)
 failed_add_clients:
        ipu_submodules_exit(ipu);
 failed_submodules_init:
-out_failed_reset:
        ipu_irq_exit(ipu);
 out_failed_irq:
+out_failed_reset:
        clk_disable_unprepare(ipu->clk);
        return ret;
 }
index 7dae0ac0f3aec12c5d44875111d3a69818e43c92..e9219f528d7e8ada4876649c28b9d0c54ce68117 100644 (file)
@@ -20,6 +20,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+/* We need to access legacy defines from linux/media.h */
+#define __NEED_MEDIA_LEGACY_API
+
 #include <linux/compat.h>
 #include <linux/export.h>
 #include <linux/idr.h>
@@ -115,6 +118,26 @@ static long media_device_enum_entities(struct media_device *mdev,
        u_ent.group_id = 0;             /* Unused */
        u_ent.pads = ent->num_pads;
        u_ent.links = ent->num_links - ent->num_backlinks;
+
+       /*
+        * Workaround for a bug at media-ctl <= v1.10 that makes it to
+        * do the wrong thing if the entity function doesn't belong to
+        * either MEDIA_ENT_F_OLD_BASE or MEDIA_ENT_F_OLD_SUBDEV_BASE
+        * Ranges.
+        *
+        * Non-subdevices are expected to be at the MEDIA_ENT_F_OLD_BASE,
+        * or, otherwise, will be silently ignored by media-ctl when
+        * printing the graphviz diagram. So, map them into the devnode
+        * old range.
+        */
+       if (ent->function < MEDIA_ENT_F_OLD_BASE ||
+           ent->function > MEDIA_ENT_T_DEVNODE_UNKNOWN) {
+               if (is_media_entity_v4l2_subdev(ent))
+                       u_ent.type = MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN;
+               else if (ent->function != MEDIA_ENT_F_IO_V4L)
+                       u_ent.type = MEDIA_ENT_T_DEVNODE_UNKNOWN;
+       }
+
        memcpy(&u_ent.raw, &ent->info, sizeof(ent->info));
        if (copy_to_user(uent, &u_ent, sizeof(u_ent)))
                return -EFAULT;
index 7d28899f89ce16c00cd9f00f424a948fa57e2ae8..38aacc7fc692024b58afd94708218b022bafd318 100644 (file)
@@ -1455,9 +1455,9 @@ static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
                return 0;
 
        ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
-       ctx->bitstream.vaddr = dma_alloc_writecombine(
-                       &ctx->dev->plat_dev->dev, ctx->bitstream.size,
-                       &ctx->bitstream.paddr, GFP_KERNEL);
+       ctx->bitstream.vaddr = dma_alloc_wc(&ctx->dev->plat_dev->dev,
+                                           ctx->bitstream.size,
+                                           &ctx->bitstream.paddr, GFP_KERNEL);
        if (!ctx->bitstream.vaddr) {
                v4l2_err(&ctx->dev->v4l2_dev,
                         "failed to allocate bitstream ringbuffer");
@@ -1474,8 +1474,8 @@ static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
        if (ctx->bitstream.vaddr == NULL)
                return;
 
-       dma_free_writecombine(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
-                             ctx->bitstream.vaddr, ctx->bitstream.paddr);
+       dma_free_wc(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
+                   ctx->bitstream.vaddr, ctx->bitstream.paddr);
        ctx->bitstream.vaddr = NULL;
        kfifo_init(&ctx->bitstream_fifo, NULL, 0);
 }
index 11fdadc68e53e57722b4d58892fdf2c644beba34..2a6eaf1122b4e9b742eb3777fb4b6b317c07b201 100644 (file)
@@ -103,6 +103,7 @@ enum ctype {
        CT_EXEC_USERSPACE,
        CT_ACCESS_USERSPACE,
        CT_WRITE_RO,
+       CT_WRITE_RO_AFTER_INIT,
        CT_WRITE_KERN,
 };
 
@@ -140,6 +141,7 @@ static char* cp_type[] = {
        "EXEC_USERSPACE",
        "ACCESS_USERSPACE",
        "WRITE_RO",
+       "WRITE_RO_AFTER_INIT",
        "WRITE_KERN",
 };
 
@@ -162,6 +164,7 @@ static DEFINE_SPINLOCK(lock_me_up);
 static u8 data_area[EXEC_SIZE];
 
 static const unsigned long rodata = 0xAA55AA55;
+static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
 
 module_param(recur_count, int, 0644);
 MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
@@ -503,11 +506,28 @@ static void lkdtm_do_action(enum ctype which)
                break;
        }
        case CT_WRITE_RO: {
-               unsigned long *ptr;
+               /* Explicitly cast away "const" for the test. */
+               unsigned long *ptr = (unsigned long *)&rodata;
 
-               ptr = (unsigned long *)&rodata;
+               pr_info("attempting bad rodata write at %p\n", ptr);
+               *ptr ^= 0xabcd1234;
 
-               pr_info("attempting bad write at %p\n", ptr);
+               break;
+       }
+       case CT_WRITE_RO_AFTER_INIT: {
+               unsigned long *ptr = &ro_after_init;
+
+               /*
+                * Verify we were written to during init. Since an Oops
+                * is considered a "success", a failure is to just skip the
+                * real test.
+                */
+               if ((*ptr & 0xAA) != 0xAA) {
+                       pr_info("%p was NOT written during init!?\n", ptr);
+                       break;
+               }
+
+               pr_info("attempting bad ro_after_init write at %p\n", ptr);
                *ptr ^= 0xabcd1234;
 
                break;
@@ -817,6 +837,9 @@ static int __init lkdtm_module_init(void)
        int n_debugfs_entries = 1; /* Assume only the direct entry */
        int i;
 
+       /* Make sure we can write to __ro_after_init values during __init */
+       ro_after_init |= 0xAA;
+
        /* Register debugfs interface */
        lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
        if (!lkdtm_debugfs_root) {
index 79316159eec63cbca36aa06d27bfaab561b62dc0..88b6c81cebbe68123da96775b3a42c3f96da4a11 100644 (file)
@@ -187,7 +187,7 @@ static int double_bit_error_detect(void *error_data, void *error_ecc,
        __nand_calculate_ecc(error_data, size, calc_ecc);
        ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size);
 
-       return (ret == -1) ? 0 : -EINVAL;
+       return (ret == -EBADMSG) ? 0 : -EINVAL;
 }
 
 static const struct nand_ecc_test nand_ecc_test[] = {
index b0045a505dc88c9ef56dcc1094a198bd011d24a7..95825b38559addb8a0dc5cca22bc305e228e65f6 100644 (file)
@@ -55,7 +55,7 @@ static int e820_pmem_probe(struct platform_device *pdev)
        for (p = iomem_resource.child; p ; p = p->sibling) {
                struct nd_region_desc ndr_desc;
 
-               if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0)
+               if (p->desc != IORES_DESC_PERSISTENT_MEMORY_LEGACY)
                        continue;
 
                memset(&ndr_desc, 0, sizeof(ndr_desc));
index a656d9e8334302c2ec4f013ad6c016034ace5f27..21905fef2cbf0d34259e37069e50140a66101441 100644 (file)
@@ -91,7 +91,7 @@ static int configure_memory(const unsigned char *buf,
        for (i=0;i<HPEE_MEMORY_MAX_ENT;i++) {
                c = get_8(buf+len);
                
-               if (NULL != (res = kmalloc(sizeof(struct resource), GFP_KERNEL))) {
+               if (NULL != (res = kzalloc(sizeof(struct resource), GFP_KERNEL))) {
                        int result;
                        
                        res->name = name;
@@ -183,7 +183,7 @@ static int configure_port(const unsigned char *buf, struct resource *io_parent,
        for (i=0;i<HPEE_PORT_MAX_ENT;i++) {
                c = get_8(buf+len);
                
-               if (NULL != (res = kmalloc(sizeof(struct resource), GFP_KERNEL))) {
+               if (NULL != (res = kzalloc(sizeof(struct resource), GFP_KERNEL))) {
                        res->name = board;
                        res->start = get_16(buf+len+1);
                        res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1;
index 602eb422351060c611967538359b8409885397a1..f89db3af0607263648c9a64432bcb1efdaeb8326 100644 (file)
@@ -4772,8 +4772,10 @@ int pci_get_new_domain_nr(void)
 void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent)
 {
        static int use_dt_domains = -1;
-       int domain = of_get_pci_domain_nr(parent->of_node);
+       int domain = -1;
 
+       if (parent)
+               domain = of_get_pci_domain_nr(parent->of_node);
        /*
         * Check DT domain and use_dt_domains values.
         *
index d7b87c64b7cd261c6ec6a1b22ddce040747679d9..e220edc85c68a12bda653b0177255cf38459c7d2 100644 (file)
@@ -117,7 +117,7 @@ int rio_request_inb_mbox(struct rio_mport *mport,
        if (mport->ops->open_inb_mbox == NULL)
                goto out;
 
-       res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+       res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
        if (res) {
                rio_init_mbox_res(res, mbox, mbox);
@@ -185,7 +185,7 @@ int rio_request_outb_mbox(struct rio_mport *mport,
        if (mport->ops->open_outb_mbox == NULL)
                goto out;
 
-       res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+       res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
        if (res) {
                rio_init_mbox_res(res, mbox, mbox);
@@ -285,7 +285,7 @@ int rio_request_inb_dbell(struct rio_mport *mport,
 {
        int rc = 0;
 
-       struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+       struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
        if (res) {
                rio_init_dbell_res(res, start, end);
@@ -360,7 +360,7 @@ int rio_release_inb_dbell(struct rio_mport *mport, u16 start, u16 end)
 struct resource *rio_request_outb_dbell(struct rio_dev *rdev, u16 start,
                                        u16 end)
 {
-       struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL);
+       struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 
        if (res) {
                rio_init_dbell_res(res, start, end);
index cb61f300f8b5d111ce7871a39be43a1b7fe3ca80..277b5c8c825ca4a63864a01ba67cbd8765b5a041 100644 (file)
@@ -67,7 +67,7 @@ static const u8 DASD_DIAG_CMS1[] = { 0xc3, 0xd4, 0xe2, 0xf1 };/* EBCDIC CMS1 */
  * and function code cmd.
  * In case of an exception return 3. Otherwise return result of bitwise OR of
  * resulting condition code and DIAG return code. */
-static inline int dia250(void *iob, int cmd)
+static inline int __dia250(void *iob, int cmd)
 {
        register unsigned long reg2 asm ("2") = (unsigned long) iob;
        typedef union {
@@ -77,7 +77,6 @@ static inline int dia250(void *iob, int cmd)
        int rc;
 
        rc = 3;
-       diag_stat_inc(DIAG_STAT_X250);
        asm volatile(
                "       diag    2,%2,0x250\n"
                "0:     ipm     %0\n"
@@ -91,6 +90,12 @@ static inline int dia250(void *iob, int cmd)
        return rc;
 }
 
+static inline int dia250(void *iob, int cmd)
+{
+       diag_stat_inc(DIAG_STAT_X250);
+       return __dia250(iob, cmd);
+}
+
 /* Initialize block I/O to DIAG device using the specified blocksize and
  * block offset. On success, return zero and set end_block to contain the
  * number of blocks on the device minus the specified offset. Return non-zero
index 2d9e7f3d56115aeaddea8eb250008dba7729d98e..bb1fb771213401a8cbf67b98011f464409ecb699 100644 (file)
@@ -66,7 +66,7 @@ int superhyway_add_device(unsigned long base, struct superhyway_device *sdev,
        superhyway_read_vcr(dev, base, &dev->vcr);
 
        if (!dev->resource) {
-               dev->resource = kmalloc(sizeof(struct resource), GFP_KERNEL);
+               dev->resource = kzalloc(sizeof(struct resource), GFP_KERNEL);
                if (!dev->resource) {
                        kfree(dev);
                        return -ENOMEM;
index 6a4ff27f4357eb229815c18b535327487f2df580..c688efa95e29b0561ac14be901cc4e4faa5aaf33 100644 (file)
@@ -204,8 +204,8 @@ static bool spi_imx_can_dma(struct spi_master *master, struct spi_device *spi,
 {
        struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
 
-       if (spi_imx->dma_is_inited &&
-           transfer->len > spi_imx->wml * sizeof(u32))
+       if (spi_imx->dma_is_inited && transfer->len >= spi_imx->wml &&
+           (transfer->len % spi_imx->wml) == 0)
                return true;
        return false;
 }
@@ -919,8 +919,6 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
        struct dma_async_tx_descriptor *desc_tx = NULL, *desc_rx = NULL;
        int ret;
        unsigned long timeout;
-       u32 dma;
-       int left;
        struct spi_master *master = spi_imx->bitbang.master;
        struct sg_table *tx = &transfer->tx_sg, *rx = &transfer->rx_sg;
 
@@ -954,13 +952,6 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
        /* Trigger the cspi module. */
        spi_imx->dma_finished = 0;
 
-       dma = readl(spi_imx->base + MX51_ECSPI_DMA);
-       dma = dma & (~MX51_ECSPI_DMA_RXT_WML_MASK);
-       /* Change RX_DMA_LENGTH trigger dma fetch tail data */
-       left = transfer->len % spi_imx->wml;
-       if (left)
-               writel(dma | (left << MX51_ECSPI_DMA_RXT_WML_OFFSET),
-                               spi_imx->base + MX51_ECSPI_DMA);
        /*
         * Set these order to avoid potential RX overflow. The overflow may
         * happen if we enable SPI HW before starting RX DMA due to rescheduling
@@ -992,10 +983,6 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
                        spi_imx->devtype_data->reset(spi_imx);
                        dmaengine_terminate_all(master->dma_rx);
                }
-               dma &= ~MX51_ECSPI_DMA_RXT_WML_MASK;
-               writel(dma |
-                      spi_imx->wml << MX51_ECSPI_DMA_RXT_WML_OFFSET,
-                      spi_imx->base + MX51_ECSPI_DMA);
        }
 
        spi_imx->dma_finished = 1;
index 79a8bc4f6cec9e32ec9c78627a2e68fc85ab5e12..7cb1b2d710c10f7aab6c245a31d34e5439dfcbf0 100644 (file)
@@ -749,6 +749,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
        return 0;
 
 err_register_master:
+       pm_runtime_disable(&pdev->dev);
        if (rs->dma_tx.ch)
                dma_release_channel(rs->dma_tx.ch);
        if (rs->dma_rx.ch)
@@ -778,6 +779,8 @@ static int rockchip_spi_remove(struct platform_device *pdev)
        if (rs->dma_rx.ch)
                dma_release_channel(rs->dma_rx.ch);
 
+       spi_master_put(master);
+
        return 0;
 }
 
index 82a663ba98009fb5f286a88651c9b3a54cf68bbb..4f229e711e1c1cfc0134abb71b617f9511a6e606 100644 (file)
@@ -177,7 +177,6 @@ void core_tmr_abort_task(
 
                if (!__target_check_io_state(se_cmd, se_sess, 0)) {
                        spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-                       target_put_sess_cmd(se_cmd);
                        goto out;
                }
                list_del_init(&se_cmd->se_cmd_list);
index a305caea58eea717fdffaac4c5364552673ad5ae..fb75b7e5a19ab52748e4a1980d56046e15716908 100644 (file)
@@ -1040,8 +1040,8 @@ static int acornfb_probe(struct platform_device *dev)
                 * for the framebuffer if we are not using
                 * VRAM.
                 */
-               base = dma_alloc_writecombine(current_par.dev, size, &handle,
-                                             GFP_KERNEL);
+               base = dma_alloc_wc(current_par.dev, size, &handle,
+                                   GFP_KERNEL);
                if (base == NULL) {
                        printk(KERN_ERR "acornfb: unable to allocate screen "
                               "memory\n");
index 7a8afcd4573e5c1e595550f25985ea1181367157..a8a22daa3f9d2a70a6fa1ca5f4e0f526e7380375 100644 (file)
@@ -154,8 +154,8 @@ int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
 {
        dma_addr_t dma;
 
-       fb->fb.screen_base = dma_alloc_writecombine(&fb->dev->dev, framesize,
-                                                   &dma, GFP_KERNEL);
+       fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, framesize, &dma,
+                                         GFP_KERNEL);
        if (!fb->fb.screen_base) {
                pr_err("CLCD: unable to map framebuffer\n");
                return -ENOMEM;
@@ -169,14 +169,12 @@ int versatile_clcd_setup_dma(struct clcd_fb *fb, unsigned long framesize)
 
 int versatile_clcd_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-       return dma_mmap_writecombine(&fb->dev->dev, vma,
-                                    fb->fb.screen_base,
-                                    fb->fb.fix.smem_start,
-                                    fb->fb.fix.smem_len);
+       return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+                          fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 void versatile_clcd_remove_dma(struct clcd_fb *fb)
 {
-       dma_free_writecombine(&fb->dev->dev, fb->fb.fix.smem_len,
-                             fb->fb.screen_base, fb->fb.fix.smem_start);
+       dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
+                   fb->fb.fix.smem_start);
 }
index 9362424c2340490585fe02e4dfe950f53f2097af..fe274b5851c77ff5e59b7c88f205cbf09ec61114 100644 (file)
@@ -774,8 +774,8 @@ static int clcdfb_of_dma_setup(struct clcd_fb *fb)
 
 static int clcdfb_of_dma_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
 {
-       return dma_mmap_writecombine(&fb->dev->dev, vma, fb->fb.screen_base,
-                       fb->fb.fix.smem_start, fb->fb.fix.smem_len);
+       return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
+                          fb->fb.fix.smem_start, fb->fb.fix.smem_len);
 }
 
 static void clcdfb_of_dma_remove(struct clcd_fb *fb)
index 19eb42b57d8742089d2faadf5c8a64ce81cc01b3..56c60e67316a57457c1741ea62d614e113d0a00e 100644 (file)
@@ -414,8 +414,8 @@ static inline void atmel_lcdfb_free_video_memory(struct atmel_lcdfb_info *sinfo)
 {
        struct fb_info *info = sinfo->info;
 
-       dma_free_writecombine(info->device, info->fix.smem_len,
-                               info->screen_base, info->fix.smem_start);
+       dma_free_wc(info->device, info->fix.smem_len, info->screen_base,
+                   info->fix.smem_start);
 }
 
 /**
@@ -435,8 +435,9 @@ static int atmel_lcdfb_alloc_video_memory(struct atmel_lcdfb_info *sinfo)
                    * ((var->bits_per_pixel + 7) / 8));
        info->fix.smem_len = max(smem_len, sinfo->smem_len);
 
-       info->screen_base = dma_alloc_writecombine(info->device, info->fix.smem_len,
-                                       (dma_addr_t *)&info->fix.smem_start, GFP_KERNEL);
+       info->screen_base = dma_alloc_wc(info->device, info->fix.smem_len,
+                                        (dma_addr_t *)&info->fix.smem_start,
+                                        GFP_KERNEL);
 
        if (!info->screen_base) {
                return -ENOMEM;
index 5b1081030cbbbefb3f7a8ff1f5f7dbed03ebb2fa..75f0db25d19fef774b4903f71b43f9beb5a578f2 100644 (file)
@@ -316,9 +316,8 @@ static int ep93xxfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
        unsigned int offset = vma->vm_pgoff << PAGE_SHIFT;
 
        if (offset < info->fix.smem_len) {
-               return dma_mmap_writecombine(info->dev, vma, info->screen_base,
-                                            info->fix.smem_start,
-                                            info->fix.smem_len);
+               return dma_mmap_wc(info->dev, vma, info->screen_base,
+                                  info->fix.smem_start, info->fix.smem_len);
        }
 
        return -EINVAL;
@@ -428,8 +427,7 @@ static int ep93xxfb_alloc_videomem(struct fb_info *info)
        /* Maximum 16bpp -> used memory is maximum x*y*2 bytes */
        fb_size = EP93XXFB_MAX_XRES * EP93XXFB_MAX_YRES * 2;
 
-       virt_addr = dma_alloc_writecombine(info->dev, fb_size,
-                                          &phys_addr, GFP_KERNEL);
+       virt_addr = dma_alloc_wc(info->dev, fb_size, &phys_addr, GFP_KERNEL);
        if (!virt_addr)
                return -ENOMEM;
 
index b63d55f481fae421cbea5cb71466401835e23a3d..1a242b1338e9a75ff4436e8532f64a37124055cd 100644 (file)
@@ -1185,8 +1185,8 @@ static int gbefb_probe(struct platform_device *p_dev)
        } else {
                /* try to allocate memory with the classical allocator
                 * this has high chance to fail on low memory machines */
-               gbe_mem = dma_alloc_writecombine(NULL, gbe_mem_size,
-                                                &gbe_dma_addr, GFP_KERNEL);
+               gbe_mem = dma_alloc_wc(NULL, gbe_mem_size, &gbe_dma_addr,
+                                      GFP_KERNEL);
                if (!gbe_mem) {
                        printk(KERN_ERR "gbefb: couldn't allocate framebuffer memory\n");
                        ret = -ENOMEM;
@@ -1238,7 +1238,7 @@ static int gbefb_probe(struct platform_device *p_dev)
 out_gbe_unmap:
        arch_phys_wc_del(par->wc_cookie);
        if (gbe_dma_addr)
-               dma_free_writecombine(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
+               dma_free_wc(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
 out_tiles_free:
        dma_free_coherent(NULL, GBE_TLB_SIZE * sizeof(uint16_t),
                          (void *)gbe_tiles.cpu, gbe_tiles.dma);
@@ -1259,7 +1259,7 @@ static int gbefb_remove(struct platform_device* p_dev)
        gbe_turn_off();
        arch_phys_wc_del(par->wc_cookie);
        if (gbe_dma_addr)
-               dma_free_writecombine(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
+               dma_free_wc(NULL, gbe_mem_size, gbe_mem, gbe_mem_phys);
        dma_free_coherent(NULL, GBE_TLB_SIZE * sizeof(uint16_t),
                          (void *)gbe_tiles.cpu, gbe_tiles.dma);
        release_mem_region(GBE_BASE, sizeof(struct sgi_gbe));
index bb2f1e866020199d31b8ba0b1940cb9e6b87923f..76b6a7784b06c7c752ba79862c26b2edcf51f932 100644 (file)
@@ -937,8 +937,8 @@ static int imxfb_probe(struct platform_device *pdev)
        }
 
        fbi->map_size = PAGE_ALIGN(info->fix.smem_len);
-       info->screen_base = dma_alloc_writecombine(&pdev->dev, fbi->map_size,
-                                                  &fbi->map_dma, GFP_KERNEL);
+       info->screen_base = dma_alloc_wc(&pdev->dev, fbi->map_size,
+                                        &fbi->map_dma, GFP_KERNEL);
 
        if (!info->screen_base) {
                dev_err(&pdev->dev, "Failed to allocate video RAM: %d\n", ret);
@@ -1005,8 +1005,8 @@ failed_cmap:
        if (pdata && pdata->exit)
                pdata->exit(fbi->pdev);
 failed_platform_init:
-       dma_free_writecombine(&pdev->dev, fbi->map_size, info->screen_base,
-                             fbi->map_dma);
+       dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base,
+                   fbi->map_dma);
 failed_map:
        iounmap(fbi->regs);
 failed_ioremap:
@@ -1041,8 +1041,8 @@ static int imxfb_remove(struct platform_device *pdev)
        kfree(info->pseudo_palette);
        framebuffer_release(info);
 
-       dma_free_writecombine(&pdev->dev, fbi->map_size, info->screen_base,
-                             fbi->map_dma);
+       dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base,
+                   fbi->map_dma);
 
        iounmap(fbi->regs);
        release_mem_region(res->start, resource_size(res));
index 7947634ee6b06c6f63707c87ef754d22342e975c..f91b1db262b04a2b131911009e3c69914b254013 100644 (file)
@@ -1336,9 +1336,8 @@ static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len,
        int retval = 0;
        dma_addr_t addr;
 
-       fbi->screen_base = dma_alloc_writecombine(fbi->device,
-                                                 mem_len,
-                                                 &addr, GFP_DMA | GFP_KERNEL);
+       fbi->screen_base = dma_alloc_wc(fbi->device, mem_len, &addr,
+                                       GFP_DMA | GFP_KERNEL);
 
        if (!fbi->screen_base) {
                dev_err(fbi->device, "Cannot allocate %u bytes framebuffer memory\n",
@@ -1378,8 +1377,8 @@ err0:
  */
 static int mx3fb_unmap_video_memory(struct fb_info *fbi)
 {
-       dma_free_writecombine(fbi->device, fbi->fix.smem_len,
-                             fbi->screen_base, fbi->fix.smem_start);
+       dma_free_wc(fbi->device, fbi->fix.smem_len, fbi->screen_base,
+                   fbi->fix.smem_start);
 
        fbi->screen_base = NULL;
        mutex_lock(&fbi->mm_lock);
index 389fa2cbb713108623ca07b67941572ccdc3fa85..6680edae4696d1b0dc4dff547dda27ca8c159ab5 100644 (file)
@@ -396,8 +396,8 @@ static int nuc900fb_map_video_memory(struct fb_info *info)
        dev_dbg(fbi->dev, "nuc900fb_map_video_memory(fbi=%p) map_size %lu\n",
                fbi, map_size);
 
-       info->screen_base = dma_alloc_writecombine(fbi->dev, map_size,
-                                                       &map_dma, GFP_KERNEL);
+       info->screen_base = dma_alloc_wc(fbi->dev, map_size, &map_dma,
+                                        GFP_KERNEL);
 
        if (!info->screen_base)
                return -ENOMEM;
@@ -411,8 +411,8 @@ static int nuc900fb_map_video_memory(struct fb_info *info)
 static inline void nuc900fb_unmap_video_memory(struct fb_info *info)
 {
        struct nuc900fb_info *fbi = info->par;
-       dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-                             info->screen_base, info->fix.smem_start);
+       dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+                   info->screen_base, info->fix.smem_start);
 }
 
 static irqreturn_t nuc900fb_irqhandler(int irq, void *dev_id)
index 6efa2591eaa8dfa6d72a22cd18eaf7f07c36174d..e3d9b9ea5498e0f840c5189bd1401200494d9ae2 100644 (file)
@@ -612,8 +612,8 @@ static void lcdc_dma_handler(u16 status, void *data)
 
 static int alloc_palette_ram(void)
 {
-       lcdc.palette_virt = dma_alloc_writecombine(lcdc.fbdev->dev,
-               MAX_PALETTE_SIZE, &lcdc.palette_phys, GFP_KERNEL);
+       lcdc.palette_virt = dma_alloc_wc(lcdc.fbdev->dev, MAX_PALETTE_SIZE,
+                                        &lcdc.palette_phys, GFP_KERNEL);
        if (lcdc.palette_virt == NULL) {
                dev_err(lcdc.fbdev->dev, "failed to alloc palette memory\n");
                return -ENOMEM;
@@ -625,8 +625,8 @@ static int alloc_palette_ram(void)
 
 static void free_palette_ram(void)
 {
-       dma_free_writecombine(lcdc.fbdev->dev, MAX_PALETTE_SIZE,
-                       lcdc.palette_virt, lcdc.palette_phys);
+       dma_free_wc(lcdc.fbdev->dev, MAX_PALETTE_SIZE, lcdc.palette_virt,
+                   lcdc.palette_phys);
 }
 
 static int alloc_fbmem(struct omapfb_mem_region *region)
@@ -642,8 +642,8 @@ static int alloc_fbmem(struct omapfb_mem_region *region)
        if (region->size > frame_size)
                frame_size = region->size;
        lcdc.vram_size = frame_size;
-       lcdc.vram_virt = dma_alloc_writecombine(lcdc.fbdev->dev,
-                       lcdc.vram_size, &lcdc.vram_phys, GFP_KERNEL);
+       lcdc.vram_virt = dma_alloc_wc(lcdc.fbdev->dev, lcdc.vram_size,
+                                     &lcdc.vram_phys, GFP_KERNEL);
        if (lcdc.vram_virt == NULL) {
                dev_err(lcdc.fbdev->dev, "unable to allocate FB DMA memory\n");
                return -ENOMEM;
@@ -660,8 +660,8 @@ static int alloc_fbmem(struct omapfb_mem_region *region)
 
 static void free_fbmem(void)
 {
-       dma_free_writecombine(lcdc.fbdev->dev, lcdc.vram_size,
-                             lcdc.vram_virt, lcdc.vram_phys);
+       dma_free_wc(lcdc.fbdev->dev, lcdc.vram_size, lcdc.vram_virt,
+                   lcdc.vram_phys);
 }
 
 static int setup_fbmem(struct omapfb_mem_desc *req_md)
index efb57c059997641baed972fe2b25fd85715cf291..def3a501acd64484342f2315c9b32fe697b166a5 100644 (file)
@@ -680,8 +680,8 @@ static int pxa168fb_probe(struct platform_device *pdev)
         */
        info->fix.smem_len = PAGE_ALIGN(DEFAULT_FB_SIZE);
 
-       info->screen_base = dma_alloc_writecombine(fbi->dev, info->fix.smem_len,
-                                               &fbi->fb_start_dma, GFP_KERNEL);
+       info->screen_base = dma_alloc_wc(fbi->dev, info->fix.smem_len,
+                                        &fbi->fb_start_dma, GFP_KERNEL);
        if (info->screen_base == NULL) {
                ret = -ENOMEM;
                goto failed_free_info;
@@ -804,8 +804,8 @@ static int pxa168fb_remove(struct platform_device *pdev)
 
        irq = platform_get_irq(pdev, 0);
 
-       dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-                               info->screen_base, info->fix.smem_start);
+       dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+                   info->screen_base, info->fix.smem_start);
 
        clk_disable(fbi->clk);
 
index 33b2bb315a2a462daefa5bc2c3181806747dc919..2c0487f4f805cf7d4c9153d4c966f7cc67c45c81 100644 (file)
@@ -2446,8 +2446,8 @@ static int pxafb_remove(struct platform_device *dev)
 
        free_pages_exact(fbi->video_mem, fbi->video_mem_size);
 
-       dma_free_writecombine(&dev->dev, fbi->dma_buff_size,
-                       fbi->dma_buff, fbi->dma_buff_phys);
+       dma_free_wc(&dev->dev, fbi->dma_buff_size, fbi->dma_buff,
+                   fbi->dma_buff_phys);
 
        iounmap(fbi->mmio_base);
 
index f72dd12456f962eba79eec43fb706be9dc1cdd39..5f4f696c2ecfd5e677575675be2969b1ba09947d 100644 (file)
@@ -1105,8 +1105,7 @@ static int s3c_fb_alloc_memory(struct s3c_fb *sfb, struct s3c_fb_win *win)
 
        dev_dbg(sfb->dev, "want %u bytes for window\n", size);
 
-       fbi->screen_base = dma_alloc_writecombine(sfb->dev, size,
-                                                 &map_dma, GFP_KERNEL);
+       fbi->screen_base = dma_alloc_wc(sfb->dev, size, &map_dma, GFP_KERNEL);
        if (!fbi->screen_base)
                return -ENOMEM;
 
@@ -1131,8 +1130,8 @@ static void s3c_fb_free_memory(struct s3c_fb *sfb, struct s3c_fb_win *win)
        struct fb_info *fbi = win->fbinfo;
 
        if (fbi->screen_base)
-               dma_free_writecombine(sfb->dev, PAGE_ALIGN(fbi->fix.smem_len),
-                             fbi->screen_base, fbi->fix.smem_start);
+               dma_free_wc(sfb->dev, PAGE_ALIGN(fbi->fix.smem_len),
+                           fbi->screen_base, fbi->fix.smem_start);
 }
 
 /**
index d6704add1601c184d217dfbe19dcaa9d91aace27..0dd86be36afbb5bdff4d268ae58c17337a98c6b9 100644 (file)
@@ -645,8 +645,8 @@ static int s3c2410fb_map_video_memory(struct fb_info *info)
 
        dprintk("map_video_memory(fbi=%p) map_size %u\n", fbi, map_size);
 
-       info->screen_base = dma_alloc_writecombine(fbi->dev, map_size,
-                                                  &map_dma, GFP_KERNEL);
+       info->screen_base = dma_alloc_wc(fbi->dev, map_size, &map_dma,
+                                        GFP_KERNEL);
 
        if (info->screen_base) {
                /* prevent initial garbage on screen */
@@ -667,8 +667,8 @@ static inline void s3c2410fb_unmap_video_memory(struct fb_info *info)
 {
        struct s3c2410fb_info *fbi = info->par;
 
-       dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-                             info->screen_base, info->fix.smem_start);
+       dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+                   info->screen_base, info->fix.smem_start);
 }
 
 static inline void modify_gpio(void __iomem *reg,
index dcf774c1588965032eb93ac3b18cc822f419de18..fc2aaa5aca2347e705c6eb1623ec188b5262e498 100644 (file)
@@ -567,8 +567,8 @@ static int sa1100fb_mmap(struct fb_info *info,
 
        if (off < info->fix.smem_len) {
                vma->vm_pgoff += 1; /* skip over the palette */
-               return dma_mmap_writecombine(fbi->dev, vma, fbi->map_cpu,
-                                            fbi->map_dma, fbi->map_size);
+               return dma_mmap_wc(fbi->dev, vma, fbi->map_cpu, fbi->map_dma,
+                                  fbi->map_size);
        }
 
        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -1099,8 +1099,8 @@ static int sa1100fb_map_video_memory(struct sa1100fb_info *fbi)
         * of the framebuffer.
         */
        fbi->map_size = PAGE_ALIGN(fbi->fb.fix.smem_len + PAGE_SIZE);
-       fbi->map_cpu = dma_alloc_writecombine(fbi->dev, fbi->map_size,
-                                             &fbi->map_dma, GFP_KERNEL);
+       fbi->map_cpu = dma_alloc_wc(fbi->dev, fbi->map_size, &fbi->map_dma,
+                                   GFP_KERNEL);
 
        if (fbi->map_cpu) {
                fbi->fb.screen_base = fbi->map_cpu + PAGE_SIZE;
index 12eab503efd1bb8793fff55eca83649e1f564426..dc4305b407bf6756791ec9b31e7929c1fe5c3ca8 100644 (file)
@@ -257,7 +257,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
                return NULL;
 
        res->name = "System RAM";
-       res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
        ret = allocate_resource(&iomem_resource, res,
                                size, 0, -1,
index 711172450da642ccdcf4ac7dcceb7546faf66fad..bbb2ad78377020ac85158fb61df067aceadaafe5 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1056,6 +1056,7 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
 int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct file *file = vma->vm_file;
+       int error;
 
        /*
         * We pass NO_SECTOR to dax_radix_entry() because we expect that a
@@ -1065,7 +1066,13 @@ int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
         * saves us from having to make a call to get_block() here to look
         * up the sector.
         */
-       dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
+       error = dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false,
+                       true);
+
+       if (error == -ENOMEM)
+               return VM_FAULT_OOM;
+       if (error)
+               return VM_FAULT_SIGBUS;
        return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
index e032a0423e351cabfd6fe6156cdfc9c53ebff216..4098acc701c3e66d5e8a229ed49828d69997655f 100644 (file)
@@ -390,6 +390,7 @@ data_copy:
                *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
                if (*err < 0)
                        break;
+               bh = bh->b_this_page;
        }
        if (!*err)
                *err = block_commit_write(pagep[0], from, from + replaced_size);
index d211b8e18566719c873838268217b143966e5965..30c4c9ebb693faaecf249df4c913c01bbaa58f20 100644 (file)
@@ -843,9 +843,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
 
                pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
                          __func__, ret);
-               /* Might as well let the VFS know */
-               d_instantiate(new_dentry, d_inode(old_dentry));
-               ihold(d_inode(old_dentry));
+               /*
+                * We can't keep the target in dcache after that.
+                * For one thing, we can't afford dentry aliases for directories.
+                * For another, if there was a victim, we _can't_ set new inode
+                * for that sucker and we have to trigger mount eviction - the
+                * caller won't do it on its own since we are returning an error.
+                */
+               d_invalidate(new_dentry);
                new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
                return ret;
        }
index 26c2de2de13fd0ce7bb55287c58e268cf4bf37e6..b7f8eaeea5d83d8a1cb66e2d697de5341e7361cd 100644 (file)
@@ -633,7 +633,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                                d_rehash(newdent);
                } else {
                        spin_lock(&dentry->d_lock);
-                       NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE;
+                       NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE;
                        spin_unlock(&dentry->d_lock);
                }
        } else {
index 9581d190f6e12346e70226c93458df0a1791abe0..77ebc2bc1cca112056501fe4dc160d48cc7069cd 100644 (file)
@@ -147,6 +147,10 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = ocfs2_inode_lock(inode, &di_bh, 1);
        if (ret < 0) {
                mlog_errno(ret);
+               if (ret == -ENOMEM)
+                       ret = VM_FAULT_OOM;
+               else
+                       ret = VM_FAULT_SIGBUS;
                goto out;
        }
 
index 594f7e63b432427fd5b6448afa0a75eb0b71d558..be5568839442d1ab50bf5cae293b7b2b133b525e 100644 (file)
@@ -1109,27 +1109,10 @@ xlog_verify_head(
        bool                    tmp_wrapped;
 
        /*
-        * Search backwards through the log looking for the log record header
-        * block. This wraps all the way back around to the head so something is
-        * seriously wrong if we can't find it.
-        */
-       found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-                                     rhead, wrapped);
-       if (found < 0)
-               return found;
-       if (!found) {
-               xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-               return -EIO;
-       }
-
-       *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
-
-       /*
-        * Now that we have a tail block, check the head of the log for torn
-        * writes. Search again until we hit the tail or the maximum number of
-        * log record I/Os that could have been in flight at one time. Use a
-        * temporary buffer so we don't trash the rhead/bp pointer from the
-        * call above.
+        * Check the head of the log for torn writes. Search backwards from the
+        * head until we hit the tail or the maximum number of log record I/Os
+        * that could have been in flight at one time. Use a temporary buffer so
+        * we don't trash the rhead/bp pointers from the caller.
         */
        tmp_bp = xlog_get_bp(log, 1);
        if (!tmp_bp)
@@ -1215,6 +1198,115 @@ xlog_verify_head(
        return error;
 }
 
+/*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+       struct xlog             *log,
+       xfs_daddr_t             *head_blk,
+       xfs_daddr_t             *tail_blk,
+       struct xlog_rec_header  *rhead,
+       xfs_daddr_t             rhead_blk,
+       struct xfs_buf          *bp,
+       bool                    *clean)
+{
+       struct xlog_op_header   *op_head;
+       xfs_daddr_t             umount_data_blk;
+       xfs_daddr_t             after_umount_blk;
+       int                     hblks;
+       int                     error;
+       char                    *offset;
+
+       *clean = false;
+
+       /*
+        * Look for unmount record. If we find it, then we know there was a
+        * clean unmount. Since 'i' could be the last block in the physical
+        * log, we convert to a log block before comparing to the head_blk.
+        *
+        * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+        * below. We won't want to clear the unmount record if there is one, so
+        * we pass the lsn of the unmount record rather than the block after it.
+        */
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               int     h_size = be32_to_cpu(rhead->h_size);
+               int     h_version = be32_to_cpu(rhead->h_version);
+
+               if ((h_version & XLOG_VERSION_2) &&
+                   (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                       hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+                       if (h_size % XLOG_HEADER_CYCLE_SIZE)
+                               hblks++;
+               } else {
+                       hblks = 1;
+               }
+       } else {
+               hblks = 1;
+       }
+       after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+       after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+       if (*head_blk == after_umount_blk &&
+           be32_to_cpu(rhead->h_num_logops) == 1) {
+               umount_data_blk = rhead_blk + hblks;
+               umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               if (error)
+                       return error;
+
+               op_head = (struct xlog_op_header *)offset;
+               if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                       /*
+                        * Set tail and last sync so that newly written log
+                        * records will point recovery to after the current
+                        * unmount record.
+                        */
+                       xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                       log->l_curr_cycle, after_umount_blk);
+                       xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                       log->l_curr_cycle, after_umount_blk);
+                       *tail_blk = after_umount_blk;
+
+                       *clean = true;
+               }
+       }
+
+       return 0;
+}
+
+static void
+xlog_set_state(
+       struct xlog             *log,
+       xfs_daddr_t             head_blk,
+       struct xlog_rec_header  *rhead,
+       xfs_daddr_t             rhead_blk,
+       bool                    bump_cycle)
+{
+       /*
+        * Reset log values according to the state of the log when we
+        * crashed.  In the case where head_blk == 0, we bump curr_cycle
+        * one because the next write starts a new cycle rather than
+        * continuing the cycle of the last good log record.  At this
+        * point we have guaranteed that all partial log records have been
+        * accounted for.  Therefore, we know that the last good log record
+        * written was complete and ended exactly on the end boundary
+        * of the physical log.
+        */
+       log->l_prev_block = rhead_blk;
+       log->l_curr_block = (int)head_blk;
+       log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+       if (bump_cycle)
+               log->l_curr_cycle++;
+       atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+       atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+       xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+                                       BBTOB(log->l_curr_block));
+       xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+                                       BBTOB(log->l_curr_block));
+}
+
 /*
  * Find the sync block number or the tail of the log.
  *
@@ -1238,22 +1330,20 @@ xlog_find_tail(
        xfs_daddr_t             *tail_blk)
 {
        xlog_rec_header_t       *rhead;
-       xlog_op_header_t        *op_head;
        char                    *offset = NULL;
        xfs_buf_t               *bp;
        int                     error;
-       xfs_daddr_t             umount_data_blk;
-       xfs_daddr_t             after_umount_blk;
        xfs_daddr_t             rhead_blk;
        xfs_lsn_t               tail_lsn;
-       int                     hblks;
        bool                    wrapped = false;
+       bool                    clean = false;
 
        /*
         * Find previous log record
         */
        if ((error = xlog_find_head(log, head_blk)))
                return error;
+       ASSERT(*head_blk < INT_MAX);
 
        bp = xlog_get_bp(log, 1);
        if (!bp)
@@ -1271,99 +1361,74 @@ xlog_find_tail(
        }
 
        /*
-        * Trim the head block back to skip over torn records. We can have
-        * multiple log I/Os in flight at any time, so we assume CRC failures
-        * back through the previous several records are torn writes and skip
-        * them.
+        * Search backwards through the log looking for the log record header
+        * block. This wraps all the way back around to the head so something is
+        * seriously wrong if we can't find it.
         */
-       ASSERT(*head_blk < INT_MAX);
-       error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
-                                &rhead, &wrapped);
-       if (error)
-               goto done;
+       error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+                                     &rhead_blk, &rhead, &wrapped);
+       if (error < 0)
+               return error;
+       if (!error) {
+               xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+               return -EIO;
+       }
+       *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
 
        /*
-        * Reset log values according to the state of the log when we
-        * crashed.  In the case where head_blk == 0, we bump curr_cycle
-        * one because the next write starts a new cycle rather than
-        * continuing the cycle of the last good log record.  At this
-        * point we have guaranteed that all partial log records have been
-        * accounted for.  Therefore, we know that the last good log record
-        * written was complete and ended exactly on the end boundary
-        * of the physical log.
+        * Set the log state based on the current head record.
         */
-       log->l_prev_block = rhead_blk;
-       log->l_curr_block = (int)*head_blk;
-       log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-       if (wrapped)
-               log->l_curr_cycle++;
-       atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-       atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-       xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-                                       BBTOB(log->l_curr_block));
-       xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-                                       BBTOB(log->l_curr_block));
+       xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
+       tail_lsn = atomic64_read(&log->l_tail_lsn);
 
        /*
-        * Look for unmount record.  If we find it, then we know there
-        * was a clean unmount.  Since 'i' could be the last block in
-        * the physical log, we convert to a log block before comparing
-        * to the head_blk.
+        * Look for an unmount record at the head of the log. This sets the log
+        * state to determine whether recovery is necessary.
+        */
+       error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+                                      rhead_blk, bp, &clean);
+       if (error)
+               goto done;
+
+       /*
+        * Verify the log head if the log is not clean (e.g., we have anything
+        * but an unmount record at the head). This uses CRC verification to
+        * detect and trim torn writes. If discovered, CRC failures are
+        * considered torn writes and the log head is trimmed accordingly.
         *
-        * Save the current tail lsn to use to pass to
-        * xlog_clear_stale_blocks() below.  We won't want to clear the
-        * unmount record if there is one, so we pass the lsn of the
-        * unmount record rather than the block after it.
+        * Note that we can only run CRC verification when the log is dirty
+        * because there's no guarantee that the log data behind an unmount
+        * record is compatible with the current architecture.
         */
-       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-               int     h_size = be32_to_cpu(rhead->h_size);
-               int     h_version = be32_to_cpu(rhead->h_version);
+       if (!clean) {
+               xfs_daddr_t     orig_head = *head_blk;
 
-               if ((h_version & XLOG_VERSION_2) &&
-                   (h_size > XLOG_HEADER_CYCLE_SIZE)) {
-                       hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-                       if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                               hblks++;
-               } else {
-                       hblks = 1;
-               }
-       } else {
-               hblks = 1;
-       }
-       after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-       after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-       tail_lsn = atomic64_read(&log->l_tail_lsn);
-       if (*head_blk == after_umount_blk &&
-           be32_to_cpu(rhead->h_num_logops) == 1) {
-               umount_data_blk = rhead_blk + hblks;
-               umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-               error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+               error = xlog_verify_head(log, head_blk, tail_blk, bp,
+                                        &rhead_blk, &rhead, &wrapped);
                if (error)
                        goto done;
 
-               op_head = (xlog_op_header_t *)offset;
-               if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
-                       /*
-                        * Set tail and last sync so that newly written
-                        * log records will point recovery to after the
-                        * current unmount record.
-                        */
-                       xlog_assign_atomic_lsn(&log->l_tail_lsn,
-                                       log->l_curr_cycle, after_umount_blk);
-                       xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
-                                       log->l_curr_cycle, after_umount_blk);
-                       *tail_blk = after_umount_blk;
-
-                       /*
-                        * Note that the unmount was clean. If the unmount
-                        * was not clean, we need to know this to rebuild the
-                        * superblock counters from the perag headers if we
-                        * have a filesystem using non-persistent counters.
-                        */
-                       log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+               /* update in-core state again if the head changed */
+               if (*head_blk != orig_head) {
+                       xlog_set_state(log, *head_blk, rhead, rhead_blk,
+                                      wrapped);
+                       tail_lsn = atomic64_read(&log->l_tail_lsn);
+                       error = xlog_check_unmount_rec(log, head_blk, tail_blk,
+                                                      rhead, rhead_blk, bp,
+                                                      &clean);
+                       if (error)
+                               goto done;
                }
        }
 
+       /*
+        * Note that the unmount was clean. If the unmount was not clean, we
+        * need to know this to rebuild the superblock counters from the perag
+        * headers if we have a filesystem using non-persistent counters.
+        */
+       if (clean)
+               log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+
        /*
         * Make sure that there are no blocks in front of the head
         * with the same cycle number as the head.  This can happen
index 39e1cb201b8eaa00ffe759ce7147cc8b3647fb5d..35a52a880b2f812b382c905bf44df50290aa4f0c 100644 (file)
@@ -119,11 +119,6 @@ static __always_inline bool virt_spin_lock(struct qspinlock *lock)
 }
 #endif
 
-/*
- * Initializier
- */
-#define        __ARCH_SPIN_LOCK_UNLOCKED       { ATOMIC_INIT(0) }
-
 /*
  * Remapping spinlock architecture specific functions to the corresponding
  * queued spinlock functions.
index 85f888e86761e1b71bd0de2a891a160194e256d7..034acd0c4956b59932f650e85612a9397236dc66 100644 (file)
@@ -32,6 +32,11 @@ typedef struct qspinlock {
        atomic_t        val;
 } arch_spinlock_t;
 
+/*
+ * Initializier
+ */
+#define        __ARCH_SPIN_LOCK_UNLOCKED       { ATOMIC_INIT(0) }
+
 /*
  * Bitfields in the atomic value:
  *
index c4bd0e2c173c011e37f6eef7acfb9bc3920fcca6..772c784ba76301dbceca9f3c0991c626a893e076 100644 (file)
        .rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {           \
                VMLINUX_SYMBOL(__start_rodata) = .;                     \
                *(.rodata) *(.rodata.*)                                 \
+               *(.data..ro_after_init) /* Read only after init */      \
                *(__vermagic)           /* Kernel version magic */      \
                . = ALIGN(8);                                           \
                VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;         \
index cb68888241084e33af2d7e11753e048431cf7837..88bc64f00bb53cb01fe60ca87dfafd7c9c0f0f4a 100644 (file)
@@ -320,11 +320,6 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
        struct bvec_iter iter = bio->bi_iter;
        int idx;
 
-       if (!bio_flagged(bio, BIO_CLONED)) {
-               *bv = bio->bi_io_vec[bio->bi_vcnt - 1];
-               return;
-       }
-
        if (unlikely(!bio_multiple_segments(bio))) {
                *bv = bio_iovec(bio);
                return;
index 17e7e82d2aa758f9888419a9c03aa4059e16b247..1be04f8c563a0c60bdfca72a36c120ec96ef327c 100644 (file)
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
+/*
+ * __read_mostly is used to keep rarely changing variables out of frequently
+ * updated cachelines. If an architecture doesn't support it, ignore the
+ * hint.
+ */
 #ifndef __read_mostly
 #define __read_mostly
 #endif
 
+/*
+ * __ro_after_init is used to mark things that are read-only after init (i.e.
+ * after mark_rodata_ro() has been called). These are effectively read-only,
+ * but may get written to during init, so can't live in .rodata (via "const").
+ */
+#ifndef __ro_after_init
+#define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
+#endif
+
 #ifndef ____cacheline_aligned
 #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
 #endif
index 48f5aab117ae12625d041cd18555031e87c178fc..a27f4f17c382b1cbf8884ac33b8134dafe624b2d 100644 (file)
@@ -263,8 +263,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * In contrast to ACCESS_ONCE these two macros will also work on aggregate
  * data types like structs or unions. If the size of the accessed data
  * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
- * READ_ONCE() and WRITE_ONCE()  will fall back to memcpy and print a
- * compile-time warning.
+ * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at
+ * least two memcpy()s: one for the __builtin_memcpy() and then one for
+ * the macro doing the copy of variable - '__u' allocated on the stack.
  *
  * Their two major use cases are: (1) Mediating communication between
  * process-level code and irq/NMI handlers, all running on the same CPU,
index 75857cda38e989e5a44150c5abb9bd1bd4872957..5e45cf930a3f53fc7071f42a5759fe3dd5fd6a31 100644 (file)
@@ -386,7 +386,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
        if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
                return;
 
-       if (!ops->free)
+       if (!ops->free || !cpu_addr)
                return;
 
        debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
@@ -641,31 +641,40 @@ static inline void dmam_release_declared_memory(struct device *dev)
 }
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
-static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
-                                          dma_addr_t *dma_addr, gfp_t gfp)
+static inline void *dma_alloc_wc(struct device *dev, size_t size,
+                                dma_addr_t *dma_addr, gfp_t gfp)
 {
        DEFINE_DMA_ATTRS(attrs);
        dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
        return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs);
 }
+#ifndef dma_alloc_writecombine
+#define dma_alloc_writecombine dma_alloc_wc
+#endif
 
-static inline void dma_free_writecombine(struct device *dev, size_t size,
-                                        void *cpu_addr, dma_addr_t dma_addr)
+static inline void dma_free_wc(struct device *dev, size_t size,
+                              void *cpu_addr, dma_addr_t dma_addr)
 {
        DEFINE_DMA_ATTRS(attrs);
        dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
        return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs);
 }
+#ifndef dma_free_writecombine
+#define dma_free_writecombine dma_free_wc
+#endif
 
-static inline int dma_mmap_writecombine(struct device *dev,
-                                       struct vm_area_struct *vma,
-                                       void *cpu_addr, dma_addr_t dma_addr,
-                                       size_t size)
+static inline int dma_mmap_wc(struct device *dev,
+                             struct vm_area_struct *vma,
+                             void *cpu_addr, dma_addr_t dma_addr,
+                             size_t size)
 {
        DEFINE_DMA_ATTRS(attrs);
        dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
        return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
 }
+#ifndef dma_mmap_writecombine
+#define dma_mmap_writecombine dma_mmap_wc
+#endif
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
index b449f378f995ae647077f521d9f7af3af9480a70..aedb254abc37204a091b790eedfde857dc22155f 100644 (file)
@@ -142,6 +142,10 @@ void prepare_namespace(void);
 void __init load_default_modules(void);
 int __init init_rootfs(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
 extern void (*late_time_init)(void);
 
 extern bool initcall_debug;
index 24bea087e7af8083b3c81596a289d70530c64285..afb45597fb5f0d3aa19f6bb9fd0818995ae6623e 100644 (file)
@@ -20,6 +20,7 @@ struct resource {
        resource_size_t end;
        const char *name;
        unsigned long flags;
+       unsigned long desc;
        struct resource *parent, *sibling, *child;
 };
 
@@ -49,12 +50,19 @@ struct resource {
 #define IORESOURCE_WINDOW      0x00200000      /* forwarded by bridge */
 #define IORESOURCE_MUXED       0x00400000      /* Resource is software muxed */
 
+#define IORESOURCE_EXT_TYPE_BITS 0x01000000    /* Resource extended types */
+#define IORESOURCE_SYSRAM      0x01000000      /* System RAM (modifier) */
+
 #define IORESOURCE_EXCLUSIVE   0x08000000      /* Userland may not map this resource */
+
 #define IORESOURCE_DISABLED    0x10000000
 #define IORESOURCE_UNSET       0x20000000      /* No address assigned yet */
 #define IORESOURCE_AUTO                0x40000000
 #define IORESOURCE_BUSY                0x80000000      /* Driver has marked this resource busy */
 
+/* I/O resource extended types */
+#define IORESOURCE_SYSTEM_RAM          (IORESOURCE_MEM|IORESOURCE_SYSRAM)
+
 /* PnP IRQ specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IRQ_HIGHEDGE                (1<<0)
 #define IORESOURCE_IRQ_LOWEDGE         (1<<1)
@@ -105,6 +113,22 @@ struct resource {
 /* PCI control bits.  Shares IORESOURCE_BITS with above PCI ROM.  */
 #define IORESOURCE_PCI_FIXED           (1<<4)  /* Do not move resource */
 
+/*
+ * I/O Resource Descriptors
+ *
+ * Descriptors are used by walk_iomem_res_desc() and region_intersects()
+ * for searching a specific resource range in the iomem table.  Assign
+ * a new descriptor when a resource range supports the search interfaces.
+ * Otherwise, resource.desc must be set to IORES_DESC_NONE (0).
+ */
+enum {
+       IORES_DESC_NONE                         = 0,
+       IORES_DESC_CRASH_KERNEL                 = 1,
+       IORES_DESC_ACPI_TABLES                  = 2,
+       IORES_DESC_ACPI_NV_STORAGE              = 3,
+       IORES_DESC_PERSISTENT_MEMORY            = 4,
+       IORES_DESC_PERSISTENT_MEMORY_LEGACY     = 5,
+};
 
 /* helpers to define resources */
 #define DEFINE_RES_NAMED(_start, _size, _name, _flags)                 \
@@ -113,6 +137,7 @@ struct resource {
                .end = (_start) + (_size) - 1,                          \
                .name = (_name),                                        \
                .flags = (_flags),                                      \
+               .desc = IORES_DESC_NONE,                                \
        }
 
 #define DEFINE_RES_IO_NAMED(_start, _size, _name)                      \
@@ -170,6 +195,10 @@ static inline unsigned long resource_type(const struct resource *res)
 {
        return res->flags & IORESOURCE_TYPE_BITS;
 }
+static inline unsigned long resource_ext_type(const struct resource *res)
+{
+       return res->flags & IORESOURCE_EXT_TYPE_BITS;
+}
 /* True iff r1 completely contains r2 */
 static inline bool resource_contains(struct resource *r1, struct resource *r2)
 {
@@ -239,8 +268,8 @@ extern int
 walk_system_ram_res(u64 start, u64 end, void *arg,
                    int (*func)(u64, u64, void *));
 extern int
-walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
-              int (*func)(u64, u64, void *));
+walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
+                   void *arg, int (*func)(u64, u64, void *));
 
 /* True if any part of r1 overlaps r2 */
 static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
index 4b9f85c963d0738e20ee7c89e2dcb28ba3cc1aaa..0fdc798e3ff795a9dffd75e36e90ce2b61141562 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
+#include <linux/sched.h>
 #include <linux/types.h>
 
 struct kmem_cache;
@@ -13,7 +14,6 @@ struct vm_struct;
 
 #include <asm/kasan.h>
 #include <asm/pgtable.h>
-#include <linux/sched.h>
 
 extern unsigned char kasan_zero_page[PAGE_SIZE];
 extern pte_t kasan_zero_pte[PTRS_PER_PTE];
@@ -43,6 +43,8 @@ static inline void kasan_disable_current(void)
 
 void kasan_unpoison_shadow(const void *address, size_t size);
 
+void kasan_unpoison_task_stack(struct task_struct *task);
+
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
 
@@ -66,6 +68,8 @@ void kasan_free_shadow(const struct vm_struct *vm);
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
+static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+
 static inline void kasan_enable_current(void) {}
 static inline void kasan_disable_current(void) {}
 
index 30cf4200ab40ee40fdae694291e36fe869b508bc..5356f4d661a721ba0446b1183e2a834f3bf3b56f 100644 (file)
@@ -113,17 +113,6 @@ extern void __list_del_entry(struct list_head *entry);
 extern void list_del(struct list_head *entry);
 #endif
 
-#ifdef CONFIG_DEBUG_LIST
-/*
- * See devm_memremap_pages() which wants DEBUG_LIST=y to assert if one
- * of the pages it allocates is ever passed to list_add()
- */
-extern void list_force_poison(struct list_head *entry);
-#else
-/* fallback to the less strict LIST_POISON* definitions */
-#define list_force_poison list_del
-#endif
-
 /**
  * list_replace - replace old entry by new one
  * @old : the element to be replaced
index 4dca42fd32f52d17326e436c4d9fcbd86a0e8d24..d026b190c53066d25753ce98f0d7c66d864a6c0a 100644 (file)
@@ -261,7 +261,6 @@ struct held_lock {
 /*
  * Initialization, self-test and debugging-output methods:
  */
-extern void lockdep_init(void);
 extern void lockdep_info(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
@@ -392,7 +391,6 @@ static inline void lockdep_on(void)
 # define lockdep_set_current_reclaim_state(g)  do { } while (0)
 # define lockdep_clear_current_reclaim_state() do { } while (0)
 # define lockdep_trace_alloc(g)                        do { } while (0)
-# define lockdep_init()                                do { } while (0)
 # define lockdep_info()                                do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
                do { (void)(name); (void)(key); } while (0)
index 516e149443397dcf712e4e62aca6fb3c78e542b6..2b6e22782699fc26b2f783c2519e0f4bf0c728d8 100644 (file)
@@ -387,7 +387,8 @@ enum {
        REGION_MIXED,
 };
 
-int region_intersects(resource_size_t offset, size_t size, const char *type);
+int region_intersects(resource_size_t offset, size_t size, unsigned long flags,
+                     unsigned long desc);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
index f5c5a3fa2c8101cc37ea917d29dec30713ec0699..a9d8cab18b00ffe1cd91b71104c82ed6422392d9 100644 (file)
@@ -468,6 +468,7 @@ struct perf_event {
        int                             group_flags;
        struct perf_event               *group_leader;
        struct pmu                      *pmu;
+       void                            *pmu_private;
 
        enum perf_event_active_state    state;
        unsigned int                    attach_state;
index acfdbf353a0b5bc7cfeb6ae58fa7e6a6acfa16ba..be586c632a0c04da3887ae6a0cf28357df98ef5c 100644 (file)
@@ -134,9 +134,6 @@ extern void syscall_unregfunc(void);
                void *it_func;                                          \
                void *__data;                                           \
                                                                        \
-               if (!cpu_online(raw_smp_processor_id()))                \
-                       return;                                         \
-                                                                       \
                if (!(cond))                                            \
                        return;                                         \
                prercu;                                                 \
@@ -343,15 +340,19 @@ extern void syscall_unregfunc(void);
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)                                     \
-               __DECLARE_TRACE(name, void, , 1, void *__data, __data)
+       __DECLARE_TRACE(name, void, ,                                   \
+                       cpu_online(raw_smp_processor_id()),             \
+                       void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)                               \
-               __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,   \
-                               PARAMS(void *__data, proto),            \
-                               PARAMS(__data, args))
+       __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),              \
+                       cpu_online(raw_smp_processor_id()),             \
+                       PARAMS(void *__data, proto),                    \
+                       PARAMS(__data, args))
 
 #define DECLARE_TRACE_CONDITION(name, proto, args, cond)               \
-       __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+       __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),              \
+                       cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \
                        PARAMS(void *__data, proto),                    \
                        PARAMS(__data, args))
 
index 317a1ed2f4acc7745c3e02955c60e8c1dc8eb7c3..9130dd5a184a25e7d6c6cf0b633d7d5c0c4fc0f6 100644 (file)
@@ -231,13 +231,13 @@ TRACE_EVENT(snd_soc_jack_report,
        TP_ARGS(jack, mask, val),
 
        TP_STRUCT__entry(
-               __string(       name,           jack->jack->name        )
+               __string(       name,           jack->jack->id          )
                __field(        int,            mask                    )
                __field(        int,            val                     )
        ),
 
        TP_fast_assign(
-               __assign_str(name, jack->jack->name);
+               __assign_str(name, jack->jack->id);
                __entry->mask = mask;
                __entry->val = val;
        ),
@@ -253,12 +253,12 @@ TRACE_EVENT(snd_soc_jack_notify,
        TP_ARGS(jack, val),
 
        TP_STRUCT__entry(
-               __string(       name,           jack->jack->name        )
+               __string(       name,           jack->jack->id          )
                __field(        int,            val                     )
        ),
 
        TP_fast_assign(
-               __assign_str(name, jack->jack->name);
+               __assign_str(name, jack->jack->id);
                __entry->val = val;
        ),
 
index 625b38f65764b34fe9d942339ef61a08af817564..a8e3a8c0d85a9173c7aa248b84e8e4e9fd01739f 100644 (file)
@@ -120,7 +120,7 @@ struct media_device_info {
 
 #define MEDIA_ENT_F_V4L2_SUBDEV_UNKNOWN        MEDIA_ENT_F_OLD_SUBDEV_BASE
 
-#ifndef __KERNEL__
+#if !defined(__KERNEL__) || defined(__NEED_MEDIA_LEGACY_API)
 
 /*
  * Legacy symbols used to avoid userspace compilation breakages
@@ -133,6 +133,10 @@ struct media_device_info {
 #define MEDIA_ENT_TYPE_MASK            0x00ff0000
 #define MEDIA_ENT_SUBTYPE_MASK         0x0000ffff
 
+/* End of the old subdev reserved numberspace */
+#define MEDIA_ENT_T_DEVNODE_UNKNOWN    (MEDIA_ENT_T_DEVNODE | \
+                                        MEDIA_ENT_SUBTYPE_MASK)
+
 #define MEDIA_ENT_T_DEVNODE            MEDIA_ENT_F_OLD_BASE
 #define MEDIA_ENT_T_DEVNODE_V4L                MEDIA_ENT_F_IO_V4L
 #define MEDIA_ENT_T_DEVNODE_FB         (MEDIA_ENT_T_DEVNODE + 2)
index 58c9e374704bb20cfff41fa92afcf7cafe498c32..7c27de4577eda65a15510af0d7f4d5cae59d57b8 100644 (file)
@@ -93,9 +93,6 @@ static int kernel_init(void *);
 extern void init_IRQ(void);
 extern void fork_init(void);
 extern void radix_tree_init(void);
-#ifndef CONFIG_DEBUG_RODATA
-static inline void mark_rodata_ro(void) { }
-#endif
 
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
@@ -499,11 +496,6 @@ asmlinkage __visible void __init start_kernel(void)
        char *command_line;
        char *after_dashes;
 
-       /*
-        * Need to run as early as possible, to initialize the
-        * lockdep hash:
-        */
-       lockdep_init();
        set_task_stack_end_magic(&init_task);
        smp_setup_processor_id();
        debug_objects_early_init();
@@ -929,6 +921,28 @@ static int try_to_run_init_process(const char *init_filename)
 
 static noinline void __init kernel_init_freeable(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+static bool rodata_enabled = true;
+static int __init set_debug_rodata(char *str)
+{
+       return strtobool(str, &rodata_enabled);
+}
+__setup("rodata=", set_debug_rodata);
+
+static void mark_readonly(void)
+{
+       if (rodata_enabled)
+               mark_rodata_ro();
+       else
+               pr_info("Kernel memory protection disabled.\n");
+}
+#else
+static inline void mark_readonly(void)
+{
+       pr_warn("This architecture does not have kernel memory protection.\n");
+}
+#endif
+
 static int __ref kernel_init(void *unused)
 {
        int ret;
@@ -937,7 +951,7 @@ static int __ref kernel_init(void *unused)
        /* need to finish all async __init code before freeing the memory */
        async_synchronize_full();
        free_initmem();
-       mark_rodata_ro();
+       mark_readonly();
        system_state = SYSTEM_RUNNING;
        numa_default_policy();
 
index e1dbf4a2c69e4ca9721c22184cb9f800325b9194..90ff129c88a27c50e33be234be695650e7210494 100644 (file)
@@ -153,13 +153,11 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
        } else {
                kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
                           __func__, bp->bp_addr);
-#ifdef CONFIG_DEBUG_RODATA
                if (!bp->bp_type) {
                        kdb_printf("Software breakpoints are unavailable.\n"
-                                  "  Change the kernel CONFIG_DEBUG_RODATA=n\n"
+                                  "  Boot the kernel with rodata=off\n"
                                   "  OR use hw breaks: help bph\n");
                }
-#endif
                return 1;
        }
        return 0;
index 614614821f00a02928439b068903904d81233808..b7231498de47f79787e0e46b95cf99fb93737b0a 100644 (file)
@@ -6785,7 +6785,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
        kfree_rcu(hlist, rcu_head);
 }
 
-static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
+static void swevent_hlist_put_cpu(int cpu)
 {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
@@ -6797,15 +6797,15 @@ static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
        mutex_unlock(&swhash->hlist_mutex);
 }
 
-static void swevent_hlist_put(struct perf_event *event)
+static void swevent_hlist_put(void)
 {
        int cpu;
 
        for_each_possible_cpu(cpu)
-               swevent_hlist_put_cpu(event, cpu);
+               swevent_hlist_put_cpu(cpu);
 }
 
-static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
+static int swevent_hlist_get_cpu(int cpu)
 {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
        int err = 0;
@@ -6828,14 +6828,13 @@ exit:
        return err;
 }
 
-static int swevent_hlist_get(struct perf_event *event)
+static int swevent_hlist_get(void)
 {
-       int err;
-       int cpu, failed_cpu;
+       int err, cpu, failed_cpu;
 
        get_online_cpus();
        for_each_possible_cpu(cpu) {
-               err = swevent_hlist_get_cpu(event, cpu);
+               err = swevent_hlist_get_cpu(cpu);
                if (err) {
                        failed_cpu = cpu;
                        goto fail;
@@ -6848,7 +6847,7 @@ fail:
        for_each_possible_cpu(cpu) {
                if (cpu == failed_cpu)
                        break;
-               swevent_hlist_put_cpu(event, cpu);
+               swevent_hlist_put_cpu(cpu);
        }
 
        put_online_cpus();
@@ -6864,7 +6863,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
        WARN_ON(event->parent);
 
        static_key_slow_dec(&perf_swevent_enabled[event_id]);
-       swevent_hlist_put(event);
+       swevent_hlist_put();
 }
 
 static int perf_swevent_init(struct perf_event *event)
@@ -6895,7 +6894,7 @@ static int perf_swevent_init(struct perf_event *event)
        if (!event->parent) {
                int err;
 
-               err = swevent_hlist_get(event);
+               err = swevent_hlist_get();
                if (err)
                        return err;
 
@@ -8001,6 +8000,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                }
        }
 
+       /* symmetric to unaccount_event() in _free_event() */
+       account_event(event);
+
        return event;
 
 err_per_task:
@@ -8364,8 +8366,6 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
 
-       account_event(event);
-
        /*
         * Special case software events and allow them to be part of
         * any hardware group.
@@ -8662,8 +8662,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        /* Mark owner so we could distinguish it from user events. */
        event->owner = TASK_TOMBSTONE;
 
-       account_event(event);
-
        ctx = find_get_context(event->pmu, task, event);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
@@ -9447,6 +9445,7 @@ ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
 
 static int __init perf_event_sysfs_init(void)
 {
index 5d6ce6413ef1d227b32c99a4330bee1289f9f571..a5d2e74c89e0b217df98326e5febf3caf687687c 100644 (file)
  *   futex_wait(futex, val);
  *
  *   waiters++; (a)
- *   mb(); (A) <-- paired with -.
- *                              |
- *   lock(hash_bucket(futex));  |
- *                              |
- *   uval = *futex;             |
- *                              |        *futex = newval;
- *                              |        sys_futex(WAKE, futex);
- *                              |          futex_wake(futex);
- *                              |
- *                              `------->  mb(); (B)
+ *   smp_mb(); (A) <-- paired with -.
+ *                                  |
+ *   lock(hash_bucket(futex));      |
+ *                                  |
+ *   uval = *futex;                 |
+ *                                  |        *futex = newval;
+ *                                  |        sys_futex(WAKE, futex);
+ *                                  |          futex_wake(futex);
+ *                                  |
+ *                                  `--------> smp_mb(); (B)
  *   if (uval == val)
  *     queue();
  *     unlock(hash_bucket(futex));
@@ -334,7 +334,7 @@ static inline void futex_get_mm(union futex_key *key)
        /*
         * Ensure futex_get_mm() implies a full barrier such that
         * get_futex_key() implies a full barrier. This is relied upon
-        * as full barrier (B), see the ordering comment above.
+        * as smp_mb(); (B), see the ordering comment above.
         */
        smp_mb__after_atomic();
 }
@@ -407,10 +407,10 @@ static void get_futex_key_refs(union futex_key *key)
 
        switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
        case FUT_OFF_INODE:
-               ihold(key->shared.inode); /* implies MB (B) */
+               ihold(key->shared.inode); /* implies smp_mb(); (B) */
                break;
        case FUT_OFF_MMSHARED:
-               futex_get_mm(key); /* implies MB (B) */
+               futex_get_mm(key); /* implies smp_mb(); (B) */
                break;
        default:
                /*
@@ -418,7 +418,7 @@ static void get_futex_key_refs(union futex_key *key)
                 * mm, therefore the only purpose of calling get_futex_key_refs
                 * is because we need the barrier for the lockless waiter check.
                 */
-               smp_mb(); /* explicit MB (B) */
+               smp_mb(); /* explicit smp_mb(); (B) */
        }
 }
 
@@ -497,7 +497,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
        if (!fshared) {
                key->private.mm = mm;
                key->private.address = address;
-               get_futex_key_refs(key);  /* implies MB (B) */
+               get_futex_key_refs(key);  /* implies smp_mb(); (B) */
                return 0;
        }
 
@@ -520,7 +520,20 @@ again:
        else
                err = 0;
 
-       lock_page(page);
+       /*
+        * The treatment of mapping from this point on is critical. The page
+        * lock protects many things but in this context the page lock
+        * stabilizes mapping, prevents inode freeing in the shared
+        * file-backed region case and guards against movement to swap cache.
+        *
+        * Strictly speaking the page lock is not needed in all cases being
+        * considered here and page lock forces unnecessarily serialization
+        * From this point on, mapping will be re-verified if necessary and
+        * page lock will be acquired only if it is unavoidable
+        */
+       page = compound_head(page);
+       mapping = READ_ONCE(page->mapping);
+
        /*
         * If page->mapping is NULL, then it cannot be a PageAnon
         * page; but it might be the ZERO_PAGE or in the gate area or
@@ -536,19 +549,31 @@ again:
         * shmem_writepage move it from filecache to swapcache beneath us:
         * an unlikely race, but we do need to retry for page->mapping.
         */
-       mapping = compound_head(page)->mapping;
-       if (!mapping) {
-               int shmem_swizzled = PageSwapCache(page);
+       if (unlikely(!mapping)) {
+               int shmem_swizzled;
+
+               /*
+                * Page lock is required to identify which special case above
+                * applies. If this is really a shmem page then the page lock
+                * will prevent unexpected transitions.
+                */
+               lock_page(page);
+               shmem_swizzled = PageSwapCache(page) || page->mapping;
                unlock_page(page);
                put_page(page);
+
                if (shmem_swizzled)
                        goto again;
+
                return -EFAULT;
        }
 
        /*
         * Private mappings are handled in a simple way.
         *
+        * If the futex key is stored on an anonymous page, then the associated
+        * object is the mm which is implicitly pinned by the calling process.
+        *
         * NOTE: When userspace waits on a MAP_SHARED mapping, even if
         * it's a read-only handle, it's expected that futexes attach to
         * the object not the particular process.
@@ -566,16 +591,74 @@ again:
                key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
                key->private.mm = mm;
                key->private.address = address;
+
+               get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
        } else {
+               struct inode *inode;
+
+               /*
+                * The associated futex object in this case is the inode and
+                * the page->mapping must be traversed. Ordinarily this should
+                * be stabilised under page lock but it's not strictly
+                * necessary in this case as we just want to pin the inode, not
+                * update the radix tree or anything like that.
+                *
+                * The RCU read lock is taken as the inode is finally freed
+                * under RCU. If the mapping still matches expectations then the
+                * mapping->host can be safely accessed as being a valid inode.
+                */
+               rcu_read_lock();
+
+               if (READ_ONCE(page->mapping) != mapping) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+
+               inode = READ_ONCE(mapping->host);
+               if (!inode) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+
+               /*
+                * Take a reference unless it is about to be freed. Previously
+                * this reference was taken by ihold under the page lock
+                * pinning the inode in place so i_lock was unnecessary. The
+                * only way for this check to fail is if the inode was
+                * truncated in parallel so warn for now if this happens.
+                *
+                * We are not calling into get_futex_key_refs() in file-backed
+                * cases, therefore a successful atomic_inc return below will
+                * guarantee that get_futex_key() will still imply smp_mb(); (B).
+                */
+               if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
+                       rcu_read_unlock();
+                       put_page(page);
+
+                       goto again;
+               }
+
+               /* Should be impossible but lets be paranoid for now */
+               if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
+                       err = -EFAULT;
+                       rcu_read_unlock();
+                       iput(inode);
+
+                       goto out;
+               }
+
                key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-               key->shared.inode = mapping->host;
+               key->shared.inode = inode;
                key->shared.pgoff = basepage_index(page);
+               rcu_read_unlock();
        }
 
-       get_futex_key_refs(key); /* implies MB (B) */
-
 out:
-       unlock_page(page);
        put_page(page);
        return err;
 }
@@ -1864,7 +1947,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
 
        q->lock_ptr = &hb->lock;
 
-       spin_lock(&hb->lock); /* implies MB (A) */
+       spin_lock(&hb->lock); /* implies smp_mb(); (A) */
        return hb;
 }
 
@@ -1927,8 +2010,12 @@ static int unqueue_me(struct futex_q *q)
 
        /* In the common case we don't take the spinlock, which is nice. */
 retry:
-       lock_ptr = q->lock_ptr;
-       barrier();
+       /*
+        * q->lock_ptr can change between this read and the following spin_lock.
+        * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
+        * optimizing lock_ptr out of the logic below.
+        */
+       lock_ptr = READ_ONCE(q->lock_ptr);
        if (lock_ptr != NULL) {
                spin_lock(lock_ptr);
                /*
index 8dc65914486999f43caa9276dde914663919fbc6..8d34308ea449ad31bae472f0403c5f1b25324683 100644 (file)
@@ -66,13 +66,15 @@ struct resource crashk_res = {
        .name  = "Crash kernel",
        .start = 0,
        .end   = 0,
-       .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+       .desc  = IORES_DESC_CRASH_KERNEL
 };
 struct resource crashk_low_res = {
        .name  = "Crash kernel",
        .start = 0,
        .end   = 0,
-       .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+       .desc  = IORES_DESC_CRASH_KERNEL
 };
 
 int kexec_should_crash(struct task_struct *p)
@@ -959,7 +961,7 @@ int crash_shrink_memory(unsigned long new_size)
 
        ram_res->start = end;
        ram_res->end = crashk_res.end;
-       ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+       ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
        ram_res->name = "System RAM";
 
        crashk_res.end = end - 1;
index 007b791f676d5605fb674b6db333d9dd1d0a23b4..56b18eb1f0013c9bc7e041a3dd2a8ecaeb580b95 100644 (file)
@@ -524,10 +524,10 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 
        /* Walk the RAM ranges and allocate a suitable range for the buffer */
        if (image->type == KEXEC_TYPE_CRASH)
-               ret = walk_iomem_res("Crash kernel",
-                                    IORESOURCE_MEM | IORESOURCE_BUSY,
-                                    crashk_res.start, crashk_res.end, kbuf,
-                                    locate_mem_hole_callback);
+               ret = walk_iomem_res_desc(crashk_res.desc,
+                               IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+                               crashk_res.start, crashk_res.end, kbuf,
+                               locate_mem_hole_callback);
        else
                ret = walk_system_ram_res(0, -1, kbuf,
                                          locate_mem_hole_callback);
index 716547fdb8731dcc395d70f2664ec6fb22378db6..f894a2cd9b2aa3ad37adfad0d1c15f46fdf93885 100644 (file)
@@ -123,8 +123,6 @@ static inline int debug_locks_off_graph_unlock(void)
        return ret;
 }
 
-static int lockdep_initialized;
-
 unsigned long nr_list_entries;
 static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 
@@ -433,19 +431,6 @@ unsigned int nr_process_chains;
 unsigned int max_lockdep_depth;
 
 #ifdef CONFIG_DEBUG_LOCKDEP
-/*
- * We cannot printk in early bootup code. Not even early_printk()
- * might work. So we mark any initialization errors and printk
- * about it later on, in lockdep_info().
- */
-static int lockdep_init_error;
-static const char *lock_init_error;
-static unsigned long lockdep_init_trace_data[20];
-static struct stack_trace lockdep_init_trace = {
-       .max_entries = ARRAY_SIZE(lockdep_init_trace_data),
-       .entries = lockdep_init_trace_data,
-};
-
 /*
  * Various lockdep statistics:
  */
@@ -669,20 +654,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
        struct hlist_head *hash_head;
        struct lock_class *class;
 
-#ifdef CONFIG_DEBUG_LOCKDEP
-       /*
-        * If the architecture calls into lockdep before initializing
-        * the hashes then we'll warn about it later. (we cannot printk
-        * right now)
-        */
-       if (unlikely(!lockdep_initialized)) {
-               lockdep_init();
-               lockdep_init_error = 1;
-               lock_init_error = lock->name;
-               save_stack_trace(&lockdep_init_trace);
-       }
-#endif
-
        if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
                debug_locks_off();
                printk(KERN_ERR
@@ -2010,6 +1981,53 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
        return lock_classes + chain_hlocks[chain->base + i];
 }
 
+/*
+ * Returns the index of the first held_lock of the current chain
+ */
+static inline int get_first_held_lock(struct task_struct *curr,
+                                       struct held_lock *hlock)
+{
+       int i;
+       struct held_lock *hlock_curr;
+
+       for (i = curr->lockdep_depth - 1; i >= 0; i--) {
+               hlock_curr = curr->held_locks + i;
+               if (hlock_curr->irq_context != hlock->irq_context)
+                       break;
+
+       }
+
+       return ++i;
+}
+
+/*
+ * Checks whether the chain and the current held locks are consistent
+ * in depth and also in content. If they are not it most likely means
+ * that there was a collision during the calculation of the chain_key.
+ * Returns: 0 not passed, 1 passed
+ */
+static int check_no_collision(struct task_struct *curr,
+                       struct held_lock *hlock,
+                       struct lock_chain *chain)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+       int i, j, id;
+
+       i = get_first_held_lock(curr, hlock);
+
+       if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
+               return 0;
+
+       for (j = 0; j < chain->depth - 1; j++, i++) {
+               id = curr->held_locks[i].class_idx - 1;
+
+               if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
+                       return 0;
+       }
+#endif
+       return 1;
+}
+
 /*
  * Look up a dependency chain. If the key is not present yet then
  * add it and return 1 - in this case the new dependency chain is
@@ -2023,7 +2041,6 @@ static inline int lookup_chain_cache(struct task_struct *curr,
        struct lock_class *class = hlock_class(hlock);
        struct hlist_head *hash_head = chainhashentry(chain_key);
        struct lock_chain *chain;
-       struct held_lock *hlock_curr;
        int i, j;
 
        /*
@@ -2041,6 +2058,9 @@ static inline int lookup_chain_cache(struct task_struct *curr,
                if (chain->chain_key == chain_key) {
 cache_hit:
                        debug_atomic_inc(chain_lookup_hits);
+                       if (!check_no_collision(curr, hlock, chain))
+                               return 0;
+
                        if (very_verbose(class))
                                printk("\nhash chain already cached, key: "
                                        "%016Lx tail class: [%p] %s\n",
@@ -2078,13 +2098,7 @@ cache_hit:
        chain = lock_chains + nr_lock_chains++;
        chain->chain_key = chain_key;
        chain->irq_context = hlock->irq_context;
-       /* Find the first held_lock of current chain */
-       for (i = curr->lockdep_depth - 1; i >= 0; i--) {
-               hlock_curr = curr->held_locks + i;
-               if (hlock_curr->irq_context != hlock->irq_context)
-                       break;
-       }
-       i++;
+       i = get_first_held_lock(curr, hlock);
        chain->depth = curr->lockdep_depth + 1 - i;
        if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
                chain->base = nr_chain_hlocks;
@@ -2172,7 +2186,7 @@ static void check_chain_key(struct task_struct *curr)
 {
 #ifdef CONFIG_DEBUG_LOCKDEP
        struct held_lock *hlock, *prev_hlock = NULL;
-       unsigned int i, id;
+       unsigned int i;
        u64 chain_key = 0;
 
        for (i = 0; i < curr->lockdep_depth; i++) {
@@ -2189,17 +2203,16 @@ static void check_chain_key(struct task_struct *curr)
                                (unsigned long long)hlock->prev_chain_key);
                        return;
                }
-               id = hlock->class_idx - 1;
                /*
                 * Whoops ran out of static storage again?
                 */
-               if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+               if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
                        return;
 
                if (prev_hlock && (prev_hlock->irq_context !=
                                                        hlock->irq_context))
                        chain_key = 0;
-               chain_key = iterate_chain_key(chain_key, id);
+               chain_key = iterate_chain_key(chain_key, hlock->class_idx);
                prev_hlock = hlock;
        }
        if (chain_key != curr->curr_chain_key) {
@@ -3077,7 +3090,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
        struct task_struct *curr = current;
        struct lock_class *class = NULL;
        struct held_lock *hlock;
-       unsigned int depth, id;
+       unsigned int depth;
        int chain_head = 0;
        int class_idx;
        u64 chain_key;
@@ -3180,11 +3193,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
         * The 'key ID' is what is the most compact key value to drive
         * the hash, not class->key.
         */
-       id = class - lock_classes;
        /*
         * Whoops, we did it again.. ran straight out of our static allocation.
         */
-       if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+       if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
                return 0;
 
        chain_key = curr->curr_chain_key;
@@ -3202,7 +3214,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
                chain_key = 0;
                chain_head = 1;
        }
-       chain_key = iterate_chain_key(chain_key, id);
+       chain_key = iterate_chain_key(chain_key, class_idx);
 
        if (nest_lock && !__lock_is_held(nest_lock))
                return print_lock_nested_lock_not_held(curr, hlock, ip);
@@ -4013,28 +4025,6 @@ out_restore:
        raw_local_irq_restore(flags);
 }
 
-void lockdep_init(void)
-{
-       int i;
-
-       /*
-        * Some architectures have their own start_kernel()
-        * code which calls lockdep_init(), while we also
-        * call lockdep_init() from the start_kernel() itself,
-        * and we want to initialize the hashes only once:
-        */
-       if (lockdep_initialized)
-               return;
-
-       for (i = 0; i < CLASSHASH_SIZE; i++)
-               INIT_HLIST_HEAD(classhash_table + i);
-
-       for (i = 0; i < CHAINHASH_SIZE; i++)
-               INIT_HLIST_HEAD(chainhash_table + i);
-
-       lockdep_initialized = 1;
-}
-
 void __init lockdep_info(void)
 {
        printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
@@ -4061,14 +4051,6 @@ void __init lockdep_info(void)
 
        printk(" per task-struct memory footprint: %lu bytes\n",
                sizeof(struct held_lock) * MAX_LOCK_DEPTH);
-
-#ifdef CONFIG_DEBUG_LOCKDEP
-       if (lockdep_init_error) {
-               printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
-               printk("Call stack leading to lockdep invocation was:\n");
-               print_stack_trace(&lockdep_init_trace, 0);
-       }
-#endif
 }
 
 static void
index 5b9102a47ea5209dd887b6dfbca16215b2bad922..c835270f0c2f93c24f623e9ea2ba2e94eebb006a 100644 (file)
@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
        node->locked = 0;
        node->next   = NULL;
 
-       prev = xchg_acquire(lock, node);
+       /*
+        * We rely on the full barrier with global transitivity implied by the
+        * below xchg() to order the initialization stores above against any
+        * observation of @node. And to provide the ACQUIRE ordering associated
+        * with a LOCK primitive.
+        */
+       prev = xchg(lock, node);
        if (likely(prev == NULL)) {
                /*
                 * Lock acquired, don't need to set node->locked to 1. Threads
index 0551c219c40e5bb8f8f87bfb0445100fe1b31cc9..e364b424b019ff51de0bde17fe2611444b5c1efb 100644 (file)
@@ -716,6 +716,7 @@ static inline void
 __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
 {
        unsigned long flags;
+       WAKE_Q(wake_q);
 
        /*
         * As a performance measurement, release the lock before doing other
@@ -743,11 +744,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
                                           struct mutex_waiter, list);
 
                debug_mutex_wake_waiter(lock, waiter);
-
-               wake_up_process(waiter->task);
+               wake_q_add(&wake_q, waiter->task);
        }
 
        spin_unlock_mutex(&lock->wait_lock, flags);
+       wake_up_q(&wake_q);
 }
 
 /*
index 393d1874b9e0c81d44ef9d0447ded22a03bccd81..ce2f75e32ae155b30cfe324c56f9bc526771b66a 100644 (file)
@@ -358,8 +358,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
         * sequentiality; this is because not all clear_pending_set_locked()
         * implementations imply full barriers.
         */
-       while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
-               cpu_relax();
+       smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
 
        /*
         * take ownership and clear the pending bit.
@@ -435,7 +434,7 @@ queue:
         *
         * The PV pv_wait_head_or_lock function, if active, will acquire
         * the lock and return a non-zero value. So we have to skip the
-        * smp_load_acquire() call. As the next PV queue head hasn't been
+        * smp_cond_acquire() call. As the next PV queue head hasn't been
         * designated yet, there is no way for the locked value to become
         * _Q_SLOW_VAL. So both the set_locked() and the
         * atomic_cmpxchg_relaxed() calls will be safe.
@@ -466,7 +465,7 @@ locked:
                        break;
                }
                /*
-                * The smp_load_acquire() call above has provided the necessary
+                * The smp_cond_acquire() call above has provided the necessary
                 * acquire semantics required for locking. At most two
                 * iterations of this loop may be ran.
                 */
index 87bb235c3448054d63923d0f9549cbc7718a49ca..21ede57f68b320594f874e4ff0a1b60974fc67f0 100644 (file)
@@ -54,6 +54,11 @@ struct pv_node {
        u8                      state;
 };
 
+/*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
 /*
  * By replacing the regular queued_spin_trylock() with the function below,
  * it will be called once when a lock waiter enter the PV slowpath before
@@ -65,9 +70,11 @@ struct pv_node {
 static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
 {
        struct __qspinlock *l = (void *)lock;
+       int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+                  (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
 
-       return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
-               (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
+       qstat_inc(qstat_pv_lock_stealing, ret);
+       return ret;
 }
 
 /*
@@ -137,11 +144,6 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
 }
 #endif /* _Q_PENDING_BITS == 8 */
 
-/*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
 /*
  * Lock and MCS node addresses hash table for fast lookup
  *
@@ -398,6 +400,11 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
        if (READ_ONCE(pn->state) == vcpu_hashed)
                lp = (struct qspinlock **)1;
 
+       /*
+        * Tracking # of slowpath locking operations
+        */
+       qstat_inc(qstat_pv_lock_slowpath, true);
+
        for (;; waitcnt++) {
                /*
                 * Set correct vCPU state to be used by queue node wait-early
index 640dcecdd1df7a5ac5c8167fbd9e70e71b2e8f21..eb2a2c9bc3fc15d181c9e5981648974dc7aec65c 100644 (file)
@@ -22,6 +22,7 @@
  *   pv_kick_wake      - # of vCPU kicks used for computing pv_latency_wake
  *   pv_latency_kick   - average latency (ns) of vCPU kick operation
  *   pv_latency_wake   - average latency (ns) from vCPU kick to wakeup
+ *   pv_lock_slowpath  - # of locking operations via the slowpath
  *   pv_lock_stealing  - # of lock stealing operations
  *   pv_spurious_wakeup        - # of spurious wakeups
  *   pv_wait_again     - # of vCPU wait's that happened after a vCPU kick
@@ -45,6 +46,7 @@ enum qlock_stats {
        qstat_pv_kick_wake,
        qstat_pv_latency_kick,
        qstat_pv_latency_wake,
+       qstat_pv_lock_slowpath,
        qstat_pv_lock_stealing,
        qstat_pv_spurious_wakeup,
        qstat_pv_wait_again,
@@ -70,6 +72,7 @@ static const char * const qstat_names[qstat_num + 1] = {
        [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
        [qstat_pv_latency_kick]    = "pv_latency_kick",
        [qstat_pv_latency_wake]    = "pv_latency_wake",
+       [qstat_pv_lock_slowpath]   = "pv_lock_slowpath",
        [qstat_pv_lock_stealing]   = "pv_lock_stealing",
        [qstat_pv_wait_again]      = "pv_wait_again",
        [qstat_pv_wait_early]      = "pv_wait_early",
@@ -279,19 +282,6 @@ static inline void __pv_wait(u8 *ptr, u8 val)
 #define pv_kick(c)     __pv_kick(c)
 #define pv_wait(p, v)  __pv_wait(p, v)
 
-/*
- * PV unfair trylock count tracking function
- */
-static inline int qstat_spin_steal_lock(struct qspinlock *lock)
-{
-       int ret = pv_queued_spin_steal_lock(lock);
-
-       qstat_inc(qstat_pv_lock_stealing, ret);
-       return ret;
-}
-#undef  queued_spin_trylock
-#define queued_spin_trylock(l) qstat_spin_steal_lock(l)
-
 #else /* CONFIG_QUEUED_LOCK_STAT */
 
 static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
index b981a7b023f04c356df5b852f60caeae232fdf45..fb9b88787ebc269fa39a3c08a19c8d0940245bbe 100644 (file)
@@ -29,10 +29,10 @@ __weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
 
 static void *try_ram_remap(resource_size_t offset, size_t size)
 {
-       struct page *page = pfn_to_page(offset >> PAGE_SHIFT);
+       unsigned long pfn = PHYS_PFN(offset);
 
        /* In the simple case just return the existing linear address */
-       if (!PageHighMem(page))
+       if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
                return __va(offset);
        return NULL; /* fallback to ioremap_cache */
 }
@@ -47,7 +47,7 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
  * being mapped does not have i/o side effects and the __iomem
  * annotation is not applicable.
  *
- * MEMREMAP_WB - matches the default mapping for "System RAM" on
+ * MEMREMAP_WB - matches the default mapping for System RAM on
  * the architecture.  This is usually a read-allocate write-back cache.
  * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
  * memremap() will bypass establishing a new mapping and instead return
@@ -56,11 +56,12 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
  * MEMREMAP_WT - establish a mapping whereby writes either bypass the
  * cache or are written through to memory and never exist in a
  * cache-dirty state with respect to program visibility.  Attempts to
- * map "System RAM" with this mapping type will fail.
+ * map System RAM with this mapping type will fail.
  */
 void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 {
-       int is_ram = region_intersects(offset, size, "System RAM");
+       int is_ram = region_intersects(offset, size,
+                                      IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
        void *addr = NULL;
 
        if (is_ram == REGION_MIXED) {
@@ -76,7 +77,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
                 * MEMREMAP_WB is special in that it can be satisifed
                 * from the direct map.  Some archs depend on the
                 * capability of memremap() to autodetect cases where
-                * the requested range is potentially in "System RAM"
+                * the requested range is potentially in System RAM.
                 */
                if (is_ram == REGION_INTERSECTS)
                        addr = try_ram_remap(offset, size);
@@ -88,7 +89,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
         * If we don't have a mapping yet and more request flags are
         * pending then we will be attempting to establish a new virtual
         * address mapping.  Enforce that this mapping is not aliasing
-        * "System RAM"
+        * System RAM.
         */
        if (!addr && is_ram == REGION_INTERSECTS && flags) {
                WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
@@ -270,13 +271,17 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
 void *devm_memremap_pages(struct device *dev, struct resource *res,
                struct percpu_ref *ref, struct vmem_altmap *altmap)
 {
-       int is_ram = region_intersects(res->start, resource_size(res),
-                       "System RAM");
        resource_size_t key, align_start, align_size, align_end;
        struct dev_pagemap *pgmap;
        struct page_map *page_map;
+       int error, nid, is_ram;
        unsigned long pfn;
-       int error, nid;
+
+       align_start = res->start & ~(SECTION_SIZE - 1);
+       align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
+               - align_start;
+       is_ram = region_intersects(align_start, align_size,
+               IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
 
        if (is_ram == REGION_MIXED) {
                WARN_ONCE(1, "%s attempted on mixed region %pr\n",
@@ -314,8 +319,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
 
        mutex_lock(&pgmap_lock);
        error = 0;
-       align_start = res->start & ~(SECTION_SIZE - 1);
-       align_size = ALIGN(resource_size(res), SECTION_SIZE);
        align_end = align_start + align_size - 1;
        for (key = align_start; key <= align_end; key += SECTION_SIZE) {
                struct dev_pagemap *dup;
@@ -351,8 +354,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
        for_each_device_pfn(pfn, page_map) {
                struct page *page = pfn_to_page(pfn);
 
-               /* ZONE_DEVICE pages must never appear on a slab lru */
-               list_force_poison(&page->lru);
+               /*
+                * ZONE_DEVICE pages union ->lru with a ->pgmap back
+                * pointer.  It is a bug if a ZONE_DEVICE page is ever
+                * freed or placed on a driver-private list.  Seed the
+                * storage with LIST_POISON* values.
+                */
+               list_del(&page->lru);
                page->pgmap = pgmap;
        }
        devres_add(dev, page_map);
index 3669d1bfc4254213e0e42dacab374624d74cdd5c..4d466052426b3e993e7e3c3550e948308e377c51 100644 (file)
@@ -333,13 +333,13 @@ int release_resource(struct resource *old)
 EXPORT_SYMBOL(release_resource);
 
 /*
- * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
- * the caller must specify res->start, res->end, res->flags and "name".
- * If found, returns 0, res is overwritten, if not found, returns -1.
- * This walks through whole tree and not just first level children
- * until and unless first_level_children_only is true.
+ * Finds the lowest iomem resource existing within [res->start.res->end).
+ * The caller must specify res->start, res->end, res->flags, and optionally
+ * desc.  If found, returns 0, res is overwritten, if not found, returns -1.
+ * This function walks the whole tree and not just first level children until
+ * and unless first_level_children_only is true.
  */
-static int find_next_iomem_res(struct resource *res, char *name,
+static int find_next_iomem_res(struct resource *res, unsigned long desc,
                               bool first_level_children_only)
 {
        resource_size_t start, end;
@@ -358,9 +358,9 @@ static int find_next_iomem_res(struct resource *res, char *name,
        read_lock(&resource_lock);
 
        for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) {
-               if (p->flags != res->flags)
+               if ((p->flags & res->flags) != res->flags)
                        continue;
-               if (name && strcmp(p->name, name))
+               if ((desc != IORES_DESC_NONE) && (desc != p->desc))
                        continue;
                if (p->start > end) {
                        p = NULL;
@@ -385,15 +385,18 @@ static int find_next_iomem_res(struct resource *res, char *name,
  * Walks through iomem resources and calls func() with matching resource
  * ranges. This walks through whole tree and not just first level children.
  * All the memory ranges which overlap start,end and also match flags and
- * name are valid candidates.
+ * desc are valid candidates.
  *
- * @name: name of resource
- * @flags: resource flags
+ * @desc: I/O resource descriptor. Use IORES_DESC_NONE to skip @desc check.
+ * @flags: I/O resource flags
  * @start: start addr
  * @end: end addr
+ *
+ * NOTE: For a new descriptor search, define a new IORES_DESC in
+ * <linux/ioport.h> and set it in 'desc' of a target resource entry.
  */
-int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
-               void *arg, int (*func)(u64, u64, void *))
+int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
+               u64 end, void *arg, int (*func)(u64, u64, void *))
 {
        struct resource res;
        u64 orig_end;
@@ -403,23 +406,27 @@ int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
        res.end = end;
        res.flags = flags;
        orig_end = res.end;
+
        while ((res.start < res.end) &&
-               (!find_next_iomem_res(&res, name, false))) {
+               (!find_next_iomem_res(&res, desc, false))) {
+
                ret = (*func)(res.start, res.end, arg);
                if (ret)
                        break;
+
                res.start = res.end + 1;
                res.end = orig_end;
        }
+
        return ret;
 }
 
 /*
- * This function calls callback against all memory range of "System RAM"
- * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
- * Now, this function is only for "System RAM". This function deals with
- * full ranges and not pfn. If resources are not pfn aligned, dealing
- * with pfn can truncate ranges.
+ * This function calls the @func callback against all memory ranges of type
+ * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
+ * Now, this function is only for System RAM, it deals with full ranges and
+ * not PFNs. If resources are not PFN-aligned, dealing with PFNs can truncate
+ * ranges.
  */
 int walk_system_ram_res(u64 start, u64 end, void *arg,
                                int (*func)(u64, u64, void *))
@@ -430,10 +437,10 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
 
        res.start = start;
        res.end = end;
-       res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
        orig_end = res.end;
        while ((res.start < res.end) &&
-               (!find_next_iomem_res(&res, "System RAM", true))) {
+               (!find_next_iomem_res(&res, IORES_DESC_NONE, true))) {
                ret = (*func)(res.start, res.end, arg);
                if (ret)
                        break;
@@ -446,9 +453,9 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
 #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 
 /*
- * This function calls callback against all memory range of "System RAM"
- * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
- * Now, this function is only for "System RAM".
+ * This function calls the @func callback against all memory ranges of type
+ * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
+ * It is to be used only for System RAM.
  */
 int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
                void *arg, int (*func)(unsigned long, unsigned long, void *))
@@ -460,10 +467,10 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 
        res.start = (u64) start_pfn << PAGE_SHIFT;
        res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
-       res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
        orig_end = res.end;
        while ((res.start < res.end) &&
-               (find_next_iomem_res(&res, "System RAM", true) >= 0)) {
+               (find_next_iomem_res(&res, IORES_DESC_NONE, true) >= 0)) {
                pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
                end_pfn = (res.end + 1) >> PAGE_SHIFT;
                if (end_pfn > pfn)
@@ -484,7 +491,7 @@ static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
 }
 /*
  * This generic page_is_ram() returns true if specified address is
- * registered as "System RAM" in iomem_resource list.
+ * registered as System RAM in iomem_resource list.
  */
 int __weak page_is_ram(unsigned long pfn)
 {
@@ -496,30 +503,34 @@ EXPORT_SYMBOL_GPL(page_is_ram);
  * region_intersects() - determine intersection of region with known resources
  * @start: region start address
  * @size: size of region
- * @name: name of resource (in iomem_resource)
+ * @flags: flags of resource (in iomem_resource)
+ * @desc: descriptor of resource (in iomem_resource) or IORES_DESC_NONE
  *
  * Check if the specified region partially overlaps or fully eclipses a
- * resource identified by @name.  Return REGION_DISJOINT if the region
- * does not overlap @name, return REGION_MIXED if the region overlaps
- * @type and another resource, and return REGION_INTERSECTS if the
- * region overlaps @type and no other defined resource. Note, that
- * REGION_INTERSECTS is also returned in the case when the specified
- * region overlaps RAM and undefined memory holes.
+ * resource identified by @flags and @desc (optional with IORES_DESC_NONE).
+ * Return REGION_DISJOINT if the region does not overlap @flags/@desc,
+ * return REGION_MIXED if the region overlaps @flags/@desc and another
+ * resource, and return REGION_INTERSECTS if the region overlaps @flags/@desc
+ * and no other defined resource. Note that REGION_INTERSECTS is also
+ * returned in the case when the specified region overlaps RAM and undefined
+ * memory holes.
  *
  * region_intersect() is used by memory remapping functions to ensure
  * the user is not remapping RAM and is a vast speed up over walking
  * through the resource table page by page.
  */
-int region_intersects(resource_size_t start, size_t size, const char *name)
+int region_intersects(resource_size_t start, size_t size, unsigned long flags,
+                     unsigned long desc)
 {
-       unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
        resource_size_t end = start + size - 1;
        int type = 0; int other = 0;
        struct resource *p;
 
        read_lock(&resource_lock);
        for (p = iomem_resource.child; p ; p = p->sibling) {
-               bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
+               bool is_type = (((p->flags & flags) == flags) &&
+                               ((desc == IORES_DESC_NONE) ||
+                                (desc == p->desc)));
 
                if (start >= p->start && start <= p->end)
                        is_type ? type++ : other++;
@@ -538,6 +549,7 @@ int region_intersects(resource_size_t start, size_t size, const char *name)
 
        return REGION_DISJOINT;
 }
+EXPORT_SYMBOL_GPL(region_intersects);
 
 void __weak arch_remove_reservations(struct resource *avail)
 {
@@ -948,6 +960,7 @@ static void __init __reserve_region_with_split(struct resource *root,
        res->start = start;
        res->end = end;
        res->flags = IORESOURCE_BUSY;
+       res->desc = IORES_DESC_NONE;
 
        while (1) {
 
@@ -982,6 +995,7 @@ static void __init __reserve_region_with_split(struct resource *root,
                                next_res->start = conflict->end + 1;
                                next_res->end = end;
                                next_res->flags = IORESOURCE_BUSY;
+                               next_res->desc = IORES_DESC_NONE;
                        }
                } else {
                        res->start = conflict->end + 1;
@@ -1071,8 +1085,9 @@ struct resource * __request_region(struct resource *parent,
        res->name = name;
        res->start = start;
        res->end = start + n - 1;
-       res->flags = resource_type(parent);
+       res->flags = resource_type(parent) | resource_ext_type(parent);
        res->flags |= IORESOURCE_BUSY | flags;
+       res->desc = IORES_DESC_NONE;
 
        write_lock(&resource_lock);
 
@@ -1238,6 +1253,7 @@ int release_mem_region_adjustable(struct resource *parent,
                        new_res->start = end + 1;
                        new_res->end = res->end;
                        new_res->flags = res->flags;
+                       new_res->desc = res->desc;
                        new_res->parent = res->parent;
                        new_res->sibling = res->sibling;
                        new_res->child = NULL;
@@ -1413,6 +1429,7 @@ static int __init reserve_setup(char *str)
                        res->start = io_start;
                        res->end = io_start + io_num - 1;
                        res->flags = IORESOURCE_BUSY;
+                       res->desc = IORES_DESC_NONE;
                        res->child = NULL;
                        if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
                                reserved = x+1;
index 9503d590e5ef5b81537947b9925ecfe689f72a91..41f6b2215aa86b2275347ea5e25ff9db59dd933f 100644 (file)
@@ -26,6 +26,7 @@
  *              Thomas Gleixner, Mike Kravetz
  */
 
+#include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
@@ -5096,6 +5097,8 @@ void init_idle(struct task_struct *idle, int cpu)
        idle->state = TASK_RUNNING;
        idle->se.exec_start = sched_clock();
 
+       kasan_unpoison_task_stack(idle);
+
 #ifdef CONFIG_SMP
        /*
         * Its possible that init_idle() gets called multiple times on a task,
index d903c02223afbaa2776b2610f00ae3def7de442e..300d29391e07416d73177c752ecca4afee21d3dd 100644 (file)
@@ -105,13 +105,12 @@ void __init call_function_init(void)
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
-static void csd_lock_wait(struct call_single_data *csd)
+static __always_inline void csd_lock_wait(struct call_single_data *csd)
 {
-       while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
-               cpu_relax();
+       smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
 }
 
-static void csd_lock(struct call_single_data *csd)
+static __always_inline void csd_lock(struct call_single_data *csd)
 {
        csd_lock_wait(csd);
        csd->flags |= CSD_FLAG_LOCK;
@@ -124,7 +123,7 @@ static void csd_lock(struct call_single_data *csd)
        smp_wmb();
 }
 
-static void csd_unlock(struct call_single_data *csd)
+static __always_inline void csd_unlock(struct call_single_data *csd)
 {
        WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
 
index c9956440d0e609c51e2032f6f625c4061ba764a4..21b81a41dae572a3263c7b731efa97a2d25872e9 100644 (file)
@@ -30,7 +30,7 @@
 struct trace_kprobe {
        struct list_head        list;
        struct kretprobe        rp;     /* Use rp.kp for kprobe use */
-       unsigned long           nhit;
+       unsigned long __percpu *nhit;
        const char              *symbol;        /* symbol name */
        struct trace_probe      tp;
 };
@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
        if (!tk)
                return ERR_PTR(ret);
 
+       tk->nhit = alloc_percpu(unsigned long);
+       if (!tk->nhit)
+               goto error;
+
        if (symbol) {
                tk->symbol = kstrdup(symbol, GFP_KERNEL);
                if (!tk->symbol)
@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
 error:
        kfree(tk->tp.call.name);
        kfree(tk->symbol);
+       free_percpu(tk->nhit);
        kfree(tk);
        return ERR_PTR(ret);
 }
@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
        kfree(tk->tp.call.class->system);
        kfree(tk->tp.call.name);
        kfree(tk->symbol);
+       free_percpu(tk->nhit);
        kfree(tk);
 }
 
@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
        struct trace_kprobe *tk = v;
+       unsigned long nhit = 0;
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               nhit += *per_cpu_ptr(tk->nhit, cpu);
 
        seq_printf(m, "  %-44s %15lu %15lu\n",
-                  trace_event_name(&tk->tp.call), tk->nhit,
+                  trace_event_name(&tk->tp.call), nhit,
                   tk->rp.kp.nmissed);
 
        return 0;
@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
        struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
 
-       tk->nhit++;
+       raw_cpu_inc(*tk->nhit);
 
        if (tk->tp.flags & TP_FLAG_TRACE)
                kprobe_trace_func(tk, regs);
@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
        struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
 
-       tk->nhit++;
+       raw_cpu_inc(*tk->nhit);
 
        if (tk->tp.flags & TP_FLAG_TRACE)
                kretprobe_trace_func(tk, ri, regs);
index 0655afbea83f596ded953d6017e0f25a640bc073..d1663083d903aaad3ce4b359cfdb20068004b237 100644 (file)
@@ -186,11 +186,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
 
 extern char *__bad_type_size(void);
 
-#define SYSCALL_FIELD(type, name)                                      \
-       sizeof(type) != sizeof(trace.name) ?                            \
+#define SYSCALL_FIELD(type, field, name)                               \
+       sizeof(type) != sizeof(trace.field) ?                           \
                __bad_type_size() :                                     \
-               #type, #name, offsetof(typeof(trace), name),            \
-               sizeof(trace.name), is_signed_type(type)
+               #type, #name, offsetof(typeof(trace), field),           \
+               sizeof(trace.field), is_signed_type(type)
 
 static int __init
 __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
@@ -261,7 +261,8 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
        int i;
        int offset = offsetof(typeof(trace), args);
 
-       ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+       ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+                                FILTER_OTHER);
        if (ret)
                return ret;
 
@@ -281,11 +282,12 @@ static int __init syscall_exit_define_fields(struct trace_event_call *call)
        struct syscall_trace_exit trace;
        int ret;
 
-       ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
+       ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
+                                FILTER_OTHER);
        if (ret)
                return ret;
 
-       ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
+       ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
                                 FILTER_OTHER);
 
        return ret;
index 5a70f6196f577a071ae0a31e9da7fa0e1dd1bc68..81dedaab36ccfed3150281a0f915a4070f667bfd 100644 (file)
@@ -41,6 +41,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
                        break;
        return i;
 }
+EXPORT_SYMBOL(cpumask_any_but);
 
 /* These are not inline because of header tangles. */
 #ifdef CONFIG_CPUMASK_OFFSTACK
index 3345a089ef7b954d475d67965b8ce891c1b16ab0..3859bf63561c63936947b007fe3ee20e822509a1 100644 (file)
 #include <linux/kernel.h>
 #include <linux/rculist.h>
 
-static struct list_head force_poison;
-void list_force_poison(struct list_head *entry)
-{
-       entry->next = &force_poison;
-       entry->prev = &force_poison;
-}
-
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -30,8 +23,6 @@ void __list_add(struct list_head *new,
                              struct list_head *prev,
                              struct list_head *next)
 {
-       WARN(new->next == &force_poison || new->prev == &force_poison,
-               "list_add attempted on force-poisoned entry\n");
        WARN(next->prev != prev,
                "list_add corruption. next->prev should be "
                "prev (%p), but was %p. (next=%p).\n",
index c61b299e367ffac86450ac59c123d0925285b96c..915d75df20864d07428376750c1bd148c216a585 100644 (file)
@@ -46,8 +46,11 @@ struct test_key {
        bool                    (*test_key)(void);
 };
 
-#define test_key_func(key, branch) \
-       ({bool func(void) { return branch(key); } func; })
+#define test_key_func(key, branch)     \
+static bool key ## _ ## branch(void)   \
+{                                      \
+       return branch(&key);            \
+}
 
 static void invert_key(struct static_key *key)
 {
@@ -92,6 +95,25 @@ static int verify_keys(struct test_key *keys, int size, bool invert)
        return 0;
 }
 
+test_key_func(old_true_key, static_key_true)
+test_key_func(old_false_key, static_key_false)
+test_key_func(true_key, static_branch_likely)
+test_key_func(true_key, static_branch_unlikely)
+test_key_func(false_key, static_branch_likely)
+test_key_func(false_key, static_branch_unlikely)
+test_key_func(base_old_true_key, static_key_true)
+test_key_func(base_inv_old_true_key, static_key_true)
+test_key_func(base_old_false_key, static_key_false)
+test_key_func(base_inv_old_false_key, static_key_false)
+test_key_func(base_true_key, static_branch_likely)
+test_key_func(base_true_key, static_branch_unlikely)
+test_key_func(base_inv_true_key, static_branch_likely)
+test_key_func(base_inv_true_key, static_branch_unlikely)
+test_key_func(base_false_key, static_branch_likely)
+test_key_func(base_false_key, static_branch_unlikely)
+test_key_func(base_inv_false_key, static_branch_likely)
+test_key_func(base_inv_false_key, static_branch_unlikely)
+
 static int __init test_static_key_init(void)
 {
        int ret;
@@ -102,95 +124,95 @@ static int __init test_static_key_init(void)
                {
                        .init_state     = true,
                        .key            = &old_true_key,
-                       .test_key       = test_key_func(&old_true_key, static_key_true),
+                       .test_key       = &old_true_key_static_key_true,
                },
                {
                        .init_state     = false,
                        .key            = &old_false_key,
-                       .test_key       = test_key_func(&old_false_key, static_key_false),
+                       .test_key       = &old_false_key_static_key_false,
                },
                /* internal keys - new keys */
                {
                        .init_state     = true,
                        .key            = &true_key.key,
-                       .test_key       = test_key_func(&true_key, static_branch_likely),
+                       .test_key       = &true_key_static_branch_likely,
                },
                {
                        .init_state     = true,
                        .key            = &true_key.key,
-                       .test_key       = test_key_func(&true_key, static_branch_unlikely),
+                       .test_key       = &true_key_static_branch_unlikely,
                },
                {
                        .init_state     = false,
                        .key            = &false_key.key,
-                       .test_key       = test_key_func(&false_key, static_branch_likely),
+                       .test_key       = &false_key_static_branch_likely,
                },
                {
                        .init_state     = false,
                        .key            = &false_key.key,
-                       .test_key       = test_key_func(&false_key, static_branch_unlikely),
+                       .test_key       = &false_key_static_branch_unlikely,
                },
                /* external keys - old keys */
                {
                        .init_state     = true,
                        .key            = &base_old_true_key,
-                       .test_key       = test_key_func(&base_old_true_key, static_key_true),
+                       .test_key       = &base_old_true_key_static_key_true,
                },
                {
                        .init_state     = false,
                        .key            = &base_inv_old_true_key,
-                       .test_key       = test_key_func(&base_inv_old_true_key, static_key_true),
+                       .test_key       = &base_inv_old_true_key_static_key_true,
                },
                {
                        .init_state     = false,
                        .key            = &base_old_false_key,
-                       .test_key       = test_key_func(&base_old_false_key, static_key_false),
+                       .test_key       = &base_old_false_key_static_key_false,
                },
                {
                        .init_state     = true,
                        .key            = &base_inv_old_false_key,
-                       .test_key       = test_key_func(&base_inv_old_false_key, static_key_false),
+                       .test_key       = &base_inv_old_false_key_static_key_false,
                },
                /* external keys - new keys */
                {
                        .init_state     = true,
                        .key            = &base_true_key.key,
-                       .test_key       = test_key_func(&base_true_key, static_branch_likely),
+                       .test_key       = &base_true_key_static_branch_likely,
                },
                {
                        .init_state     = true,
                        .key            = &base_true_key.key,
-                       .test_key       = test_key_func(&base_true_key, static_branch_unlikely),
+                       .test_key       = &base_true_key_static_branch_unlikely,
                },
                {
                        .init_state     = false,
                        .key            = &base_inv_true_key.key,
-                       .test_key       = test_key_func(&base_inv_true_key, static_branch_likely),
+                       .test_key       = &base_inv_true_key_static_branch_likely,
                },
                {
                        .init_state     = false,
                        .key            = &base_inv_true_key.key,
-                       .test_key       = test_key_func(&base_inv_true_key, static_branch_unlikely),
+                       .test_key       = &base_inv_true_key_static_branch_unlikely,
                },
                {
                        .init_state     = false,
                        .key            = &base_false_key.key,
-                       .test_key       = test_key_func(&base_false_key, static_branch_likely),
+                       .test_key       = &base_false_key_static_branch_likely,
                },
                {
                        .init_state     = false,
                        .key            = &base_false_key.key,
-                       .test_key       = test_key_func(&base_false_key, static_branch_unlikely),
+                       .test_key       = &base_false_key_static_branch_unlikely,
                },
                {
                        .init_state     = true,
                        .key            = &base_inv_false_key.key,
-                       .test_key       = test_key_func(&base_inv_false_key, static_branch_likely),
+                       .test_key       = &base_inv_false_key_static_branch_likely,
                },
                {
                        .init_state     = true,
                        .key            = &base_inv_false_key.key,
-                       .test_key       = test_key_func(&base_inv_false_key, static_branch_unlikely),
+                       .test_key       = &base_inv_false_key_static_branch_unlikely,
                },
        };
 
index 3461d97ecb30bfe18d1ed50729e5147aa2ad7e6d..da7a35d83de7edc4ed562caa5e1925194f2c7229 100644 (file)
@@ -195,6 +195,30 @@ void __delete_from_page_cache(struct page *page, void *shadow,
        else
                cleancache_invalidate_page(mapping, page);
 
+       VM_BUG_ON_PAGE(page_mapped(page), page);
+       if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
+               int mapcount;
+
+               pr_alert("BUG: Bad page cache in process %s  pfn:%05lx\n",
+                        current->comm, page_to_pfn(page));
+               dump_page(page, "still mapped when deleted");
+               dump_stack();
+               add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+
+               mapcount = page_mapcount(page);
+               if (mapping_exiting(mapping) &&
+                   page_count(page) >= mapcount + 2) {
+                       /*
+                        * All vmas have already been torn down, so it's
+                        * a good bet that actually the page is unmapped,
+                        * and we'd prefer not to leak it: if we're wrong,
+                        * some other bad page check should catch it later.
+                        */
+                       page_mapcount_reset(page);
+                       atomic_sub(mapcount, &page->_count);
+               }
+       }
+
        page_cache_tree_delete(mapping, page, shadow);
 
        page->mapping = NULL;
@@ -205,7 +229,6 @@ void __delete_from_page_cache(struct page *page, void *shadow,
                __dec_zone_page_state(page, NR_FILE_PAGES);
        if (PageSwapBacked(page))
                __dec_zone_page_state(page, NR_SHMEM);
-       VM_BUG_ON_PAGE(page_mapped(page), page);
 
        /*
         * At this point page must be either written or cleaned by truncate.
index 01f2b48c8618a9f973eeb11f2162b75bb8cf67d4..aefba5a9cc47f74880dbbb3491a8927fcf999b06 100644 (file)
@@ -2751,7 +2751,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
        int ret;
 
        if (!hugepages_supported())
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
 
        table->data = &tmp;
        table->maxlen = sizeof(unsigned long);
@@ -2792,7 +2792,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
        int ret;
 
        if (!hugepages_supported())
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
 
        tmp = h->nr_overcommit_huge_pages;
 
@@ -3502,7 +3502,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
         * COW. Warn that such a situation has occurred as it may not be obvious
         */
        if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
-               pr_warning("PID %d killed due to inadequate hugepage pool\n",
+               pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n",
                           current->pid);
                return ret;
        }
index bc0a8d8b8f42faf7bca01501425ba73e156ee293..1ad20ade8c91328d2f47e8c116f9aca6bc2fb57f 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
+#include <linux/linkage.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/mm.h>
@@ -60,6 +61,25 @@ void kasan_unpoison_shadow(const void *address, size_t size)
        }
 }
 
+static void __kasan_unpoison_stack(struct task_struct *task, void *sp)
+{
+       void *base = task_stack_page(task);
+       size_t size = sp - base;
+
+       kasan_unpoison_shadow(base, size);
+}
+
+/* Unpoison the entire stack for a task. */
+void kasan_unpoison_task_stack(struct task_struct *task)
+{
+       __kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE);
+}
+
+/* Unpoison the stack for the current task beyond a watermark sp value. */
+asmlinkage void kasan_unpoison_remaining_stack(void *sp)
+{
+       __kasan_unpoison_stack(current, sp);
+}
 
 /*
  * All functions below always inlined so compiler could
index 4af58a3a8ffa345c16fe190be76ba9f9e83113d4..979b18cbd343e3422d5b4ccb86d1fab2bb1090e2 100644 (file)
@@ -138,7 +138,7 @@ static struct resource *register_memory_resource(u64 start, u64 size)
        res->name = "System RAM";
        res->start = start;
        res->end = start + size - 1;
-       res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
        if (request_resource(&iomem_resource, res) < 0) {
                pr_debug("System RAM resource %pR cannot be added\n", res);
                kfree(res);
index 4c4187c0e1deeb25bdbf5eb383132d27feb9be90..9a3f6b90e6283b24bff091213768612b04b6d928 100644 (file)
@@ -532,7 +532,7 @@ retry:
                nid = page_to_nid(page);
                if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
                        continue;
-               if (PageTail(page) && PageAnon(page)) {
+               if (PageTransCompound(page) && PageAnon(page)) {
                        get_page(page);
                        pte_unmap_unlock(pte, ptl);
                        lock_page(page);
index 004d42b1dfaf928ab174e057696afa580447f3d9..7924f4f58a6d48ae5335fdedb1c2e2dd93a56d88 100644 (file)
@@ -135,8 +135,8 @@ static void *remove_element(mempool_t *pool)
        void *element = pool->elements[--pool->curr_nr];
 
        BUG_ON(pool->curr_nr < 0);
-       check_element(pool, element);
        kasan_unpoison_element(pool, element);
+       check_element(pool, element);
        return element;
 }
 
index d154f0877fd806a93d234e6d4288da27d6c5d09f..7bfe9fa1c8dc6db8d6c4415fbf0d26e66477e42e 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/awk -f
 # extract linker version number from stdin and turn into single number
        {
-       gsub(".*)", "");
+       gsub(".*\\)", "");
        gsub(".*version ", "");
        gsub("-.*", "");
        split($1,a, ".");
index e9b98af6b52c83faecccd4cc1011286a75560849..e8da3b8ee7213352426023029fcff04e87659d50 100644 (file)
@@ -141,10 +141,8 @@ int pxa2xx_pcm_mmap(struct snd_pcm_substream *substream,
        struct vm_area_struct *vma)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
-       return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-                                    runtime->dma_area,
-                                    runtime->dma_addr,
-                                    runtime->dma_bytes);
+       return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+                          runtime->dma_addr, runtime->dma_bytes);
 }
 EXPORT_SYMBOL(pxa2xx_pcm_mmap);
 
@@ -156,8 +154,7 @@ int pxa2xx_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream)
        buf->dev.type = SNDRV_DMA_TYPE_DEV;
        buf->dev.dev = pcm->card->dev;
        buf->private_data = NULL;
-       buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-                                          &buf->addr, GFP_KERNEL);
+       buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
        if (!buf->area)
                return -ENOMEM;
        buf->bytes = size;
@@ -178,8 +175,7 @@ void pxa2xx_pcm_free_dma_buffers(struct snd_pcm *pcm)
                buf = &substream->dma_buffer;
                if (!buf->area)
                        continue;
-               dma_free_writecombine(pcm->card->dev, buf->bytes,
-                                     buf->area, buf->addr);
+               dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
                buf->area = NULL;
        }
 }
index affb192238a403b88dc38f3e76e0c54440779ea0..faae6936bae4f55851e4f0a7062f03fdebf674ee 100644 (file)
@@ -1130,7 +1130,7 @@ static int sid_status_control_get(struct snd_kcontrol *kcontrol,
        struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev);
 
        mutex_lock(&drvdata->ctrl_lock);
-       ucontrol->value.integer.value[0] = drvdata->sid_status;
+       ucontrol->value.enumerated.item[0] = drvdata->sid_status;
        mutex_unlock(&drvdata->ctrl_lock);
 
        return 0;
@@ -1147,7 +1147,7 @@ static int sid_status_control_put(struct snd_kcontrol *kcontrol,
 
        dev_dbg(codec->dev, "%s: Enter\n", __func__);
 
-       if (ucontrol->value.integer.value[0] != SID_APPLY_FIR) {
+       if (ucontrol->value.enumerated.item[0] != SID_APPLY_FIR) {
                dev_err(codec->dev,
                        "%s: ERROR: This control supports '%s' only!\n",
                        __func__, enum_sid_state[SID_APPLY_FIR]);
@@ -1199,7 +1199,7 @@ static int anc_status_control_get(struct snd_kcontrol *kcontrol,
        struct ab8500_codec_drvdata *drvdata = dev_get_drvdata(codec->dev);
 
        mutex_lock(&drvdata->ctrl_lock);
-       ucontrol->value.integer.value[0] = drvdata->anc_status;
+       ucontrol->value.enumerated.item[0] = drvdata->anc_status;
        mutex_unlock(&drvdata->ctrl_lock);
 
        return 0;
@@ -1220,7 +1220,7 @@ static int anc_status_control_put(struct snd_kcontrol *kcontrol,
 
        mutex_lock(&drvdata->ctrl_lock);
 
-       req = ucontrol->value.integer.value[0];
+       req = ucontrol->value.enumerated.item[0];
        if (req >= ARRAY_SIZE(enum_anc_state)) {
                status = -EINVAL;
                goto cleanup;
index e13583e6ff56aa5d69a163ca7a447acdaa18cbd6..5ae87a084d97e24a6aa243ba6cb1afc45290ac43 100644 (file)
@@ -103,9 +103,9 @@ bool adau17x1_has_dsp(struct adau *adau);
 #define ADAU17X1_CLOCK_CONTROL_CORECLK_SRC_PLL BIT(3)
 #define ADAU17X1_CLOCK_CONTROL_SYSCLK_EN       BIT(0)
 
-#define ADAU17X1_SERIAL_PORT1_BCLK32           (0x0 << 5)
-#define ADAU17X1_SERIAL_PORT1_BCLK48           (0x1 << 5)
-#define ADAU17X1_SERIAL_PORT1_BCLK64           (0x2 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK64           (0x0 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK32           (0x1 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK48           (0x2 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK128          (0x3 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK256          (0x4 << 5)
 #define ADAU17X1_SERIAL_PORT1_BCLK_MASK                (0x7 << 5)
index b3951524339f90cdf8f90dbc23cbadd46a874af9..35488f14e2378ada271328a97504a9558b746836 100644 (file)
@@ -60,15 +60,15 @@ static int cs42l51_get_chan_mix(struct snd_kcontrol *kcontrol,
        switch (value) {
        default:
        case 0:
-               ucontrol->value.integer.value[0] = 0;
+               ucontrol->value.enumerated.item[0] = 0;
                break;
        /* same value : (L+R)/2 and (R+L)/2 */
        case 1:
        case 2:
-               ucontrol->value.integer.value[0] = 1;
+               ucontrol->value.enumerated.item[0] = 1;
                break;
        case 3:
-               ucontrol->value.integer.value[0] = 2;
+               ucontrol->value.enumerated.item[0] = 2;
                break;
        }
 
@@ -85,7 +85,7 @@ static int cs42l51_set_chan_mix(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        unsigned char val;
 
-       switch (ucontrol->value.integer.value[0]) {
+       switch (ucontrol->value.enumerated.item[0]) {
        default:
        case 0:
                val = CHAN_MIX_NORMAL;
index 1d5a89c5164b85686f652dc8f5e13af6c1eb49e6..461506a4ca6a2ddb790b1646f27bce2d7abf25ba 100644 (file)
@@ -334,7 +334,7 @@ static int da732x_hpf_set(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct soc_enum *enum_ctrl = (struct soc_enum *)kcontrol->private_value;
        unsigned int reg = enum_ctrl->reg;
-       unsigned int sel = ucontrol->value.integer.value[0];
+       unsigned int sel = ucontrol->value.enumerated.item[0];
        unsigned int bits;
 
        switch (sel) {
@@ -368,13 +368,13 @@ static int da732x_hpf_get(struct snd_kcontrol *kcontrol,
 
        switch (val) {
        case DA732X_HPF_VOICE_EN:
-               ucontrol->value.integer.value[0] = DA732X_HPF_VOICE;
+               ucontrol->value.enumerated.item[0] = DA732X_HPF_VOICE;
                break;
        case DA732X_HPF_MUSIC_EN:
-               ucontrol->value.integer.value[0] = DA732X_HPF_MUSIC;
+               ucontrol->value.enumerated.item[0] = DA732X_HPF_MUSIC;
                break;
        default:
-               ucontrol->value.integer.value[0] = DA732X_HPF_DISABLED;
+               ucontrol->value.enumerated.item[0] = DA732X_HPF_DISABLED;
                break;
        }
 
index 20dcc496d39c9d8ac01e4d49004f92823fdd37a6..fc22804cabc5942acca6921c034fa98f234041dc 100644 (file)
@@ -1496,7 +1496,7 @@ static int max98088_put_eq_enum(struct snd_kcontrol *kcontrol,
        struct max98088_pdata *pdata = max98088->pdata;
        int channel = max98088_get_channel(codec, kcontrol->id.name);
        struct max98088_cdata *cdata;
-       int sel = ucontrol->value.integer.value[0];
+       int sel = ucontrol->value.enumerated.item[0];
 
        if (channel < 0)
               return channel;
index 1fedac50355e954790a15b84ed6aab49b2bb98e3..3577003f39cf8feaba0d97bb313e277b3b763ce7 100644 (file)
@@ -1499,7 +1499,7 @@ static int max98095_put_eq_enum(struct snd_kcontrol *kcontrol,
        struct max98095_pdata *pdata = max98095->pdata;
        int channel = max98095_get_eq_channel(kcontrol->id.name);
        struct max98095_cdata *cdata;
-       unsigned int sel = ucontrol->value.integer.value[0];
+       unsigned int sel = ucontrol->value.enumerated.item[0];
        struct max98095_eq_cfg *coef_set;
        int fs, best, best_val, i;
        int regmask, regsave;
@@ -1653,7 +1653,7 @@ static int max98095_put_bq_enum(struct snd_kcontrol *kcontrol,
        struct max98095_pdata *pdata = max98095->pdata;
        int channel = max98095_get_bq_channel(codec, kcontrol->id.name);
        struct max98095_cdata *cdata;
-       unsigned int sel = ucontrol->value.integer.value[0];
+       unsigned int sel = ucontrol->value.enumerated.item[0];
        struct max98095_biquad_cfg *coef_set;
        int fs, best, best_val, i;
        int regmask, regsave;
index 781398fb2841565f20d2f7e7a4f893d840e3e7ae..f7a6ce7e5fb12b7ab183428b28d6c69409676d08 100644 (file)
@@ -446,7 +446,7 @@ static int dac33_get_fifo_mode(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.integer.value[0] = dac33->fifo_mode;
+       ucontrol->value.enumerated.item[0] = dac33->fifo_mode;
 
        return 0;
 }
@@ -458,17 +458,16 @@ static int dac33_set_fifo_mode(struct snd_kcontrol *kcontrol,
        struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec);
        int ret = 0;
 
-       if (dac33->fifo_mode == ucontrol->value.integer.value[0])
+       if (dac33->fifo_mode == ucontrol->value.enumerated.item[0])
                return 0;
        /* Do not allow changes while stream is running*/
        if (snd_soc_codec_is_active(codec))
                return -EPERM;
 
-       if (ucontrol->value.integer.value[0] < 0 ||
-           ucontrol->value.integer.value[0] >= DAC33_FIFO_LAST_MODE)
+       if (ucontrol->value.enumerated.item[0] >= DAC33_FIFO_LAST_MODE)
                ret = -EINVAL;
        else
-               dac33->fifo_mode = ucontrol->value.integer.value[0];
+               dac33->fifo_mode = ucontrol->value.enumerated.item[0];
 
        return ret;
 }
index 7693c1129babf0e42c58b5ca93832052374cb426..1b79778098d29ffbb377efc24576825cdfe8038c 100644 (file)
@@ -175,7 +175,7 @@ static int snd_wl1273_get_audio_route(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wl1273_priv *wl1273 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.integer.value[0] = wl1273->mode;
+       ucontrol->value.enumerated.item[0] = wl1273->mode;
 
        return 0;
 }
@@ -193,18 +193,17 @@ static int snd_wl1273_set_audio_route(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wl1273_priv *wl1273 = snd_soc_codec_get_drvdata(codec);
 
-       if (wl1273->mode == ucontrol->value.integer.value[0])
+       if (wl1273->mode == ucontrol->value.enumerated.item[0])
                return 0;
 
        /* Do not allow changes while stream is running */
        if (snd_soc_codec_is_active(codec))
                return -EPERM;
 
-       if (ucontrol->value.integer.value[0] < 0 ||
-           ucontrol->value.integer.value[0] >=  ARRAY_SIZE(wl1273_audio_route))
+       if (ucontrol->value.enumerated.item[0] >=  ARRAY_SIZE(wl1273_audio_route))
                return -EINVAL;
 
-       wl1273->mode = ucontrol->value.integer.value[0];
+       wl1273->mode = ucontrol->value.enumerated.item[0];
 
        return 1;
 }
@@ -219,7 +218,7 @@ static int snd_wl1273_fm_audio_get(struct snd_kcontrol *kcontrol,
 
        dev_dbg(codec->dev, "%s: enter.\n", __func__);
 
-       ucontrol->value.integer.value[0] = wl1273->core->audio_mode;
+       ucontrol->value.enumerated.item[0] = wl1273->core->audio_mode;
 
        return 0;
 }
@@ -233,7 +232,7 @@ static int snd_wl1273_fm_audio_put(struct snd_kcontrol *kcontrol,
 
        dev_dbg(codec->dev, "%s: enter.\n", __func__);
 
-       val = ucontrol->value.integer.value[0];
+       val = ucontrol->value.enumerated.item[0];
        if (wl1273->core->audio_mode == val)
                return 0;
 
index 61299ca372ffc1fc3f83dd88fe823151da7b09b8..6f1024f48b193bc7d3127cc8b12d56d14124413b 100644 (file)
@@ -233,7 +233,7 @@ static int wm8753_get_dai(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.integer.value[0] = wm8753->dai_func;
+       ucontrol->value.enumerated.item[0] = wm8753->dai_func;
        return 0;
 }
 
@@ -244,7 +244,7 @@ static int wm8753_set_dai(struct snd_kcontrol *kcontrol,
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
        u16 ioctl;
 
-       if (wm8753->dai_func == ucontrol->value.integer.value[0])
+       if (wm8753->dai_func == ucontrol->value.enumerated.item[0])
                return 0;
 
        if (snd_soc_codec_is_active(codec))
@@ -252,7 +252,7 @@ static int wm8753_set_dai(struct snd_kcontrol *kcontrol,
 
        ioctl = snd_soc_read(codec, WM8753_IOCTL);
 
-       wm8753->dai_func = ucontrol->value.integer.value[0];
+       wm8753->dai_func = ucontrol->value.enumerated.item[0];
 
        if (((ioctl >> 2) & 0x3) == wm8753->dai_func)
                return 1;
index 8172e499e6ed18f33e6f94b2bec1a1647d1c1788..edd7a77091942cd7b873e9c193baed54dd6186c6 100644 (file)
@@ -396,7 +396,7 @@ static int wm8904_put_drc_enum(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
        struct wm8904_pdata *pdata = wm8904->pdata;
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
 
        if (value >= pdata->num_drc_cfgs)
                return -EINVAL;
@@ -467,7 +467,7 @@ static int wm8904_put_retune_mobile_enum(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
        struct wm8904_pdata *pdata = wm8904->pdata;
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
 
        if (value >= pdata->num_retune_mobile_cfgs)
                return -EINVAL;
index c799cca5abeb704645687b863ccedfc6749e9ac2..6b864c0fc2b676cc83f9d03d2218815e7e805919 100644 (file)
@@ -459,7 +459,7 @@ static int wm8958_put_mbc_enum(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
        struct wm8994 *control = wm8994->wm8994;
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
        int reg;
 
        /* Don't allow on the fly reconfiguration */
@@ -549,7 +549,7 @@ static int wm8958_put_vss_enum(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
        struct wm8994 *control = wm8994->wm8994;
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
        int reg;
 
        /* Don't allow on the fly reconfiguration */
@@ -582,7 +582,7 @@ static int wm8958_put_vss_hpf_enum(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
        struct wm8994 *control = wm8994->wm8994;
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
        int reg;
 
        /* Don't allow on the fly reconfiguration */
@@ -749,7 +749,7 @@ static int wm8958_put_enh_eq_enum(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
        struct wm8994 *control = wm8994->wm8994;
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
        int reg;
 
        /* Don't allow on the fly reconfiguration */
index 7350ff654bbf37fe660e2b7a75cc5a15b5d63c68..0c002a5712cb4f261cd84454f38e24a89836519a 100644 (file)
@@ -497,9 +497,9 @@ static int eqmode_get(struct snd_kcontrol *kcontrol,
 
        reg = snd_soc_read(codec, WM8983_EQ1_LOW_SHELF);
        if (reg & WM8983_EQ3DMODE)
-               ucontrol->value.integer.value[0] = 1;
+               ucontrol->value.enumerated.item[0] = 1;
        else
-               ucontrol->value.integer.value[0] = 0;
+               ucontrol->value.enumerated.item[0] = 0;
 
        return 0;
 }
@@ -511,18 +511,18 @@ static int eqmode_put(struct snd_kcontrol *kcontrol,
        unsigned int regpwr2, regpwr3;
        unsigned int reg_eq;
 
-       if (ucontrol->value.integer.value[0] != 0
-           && ucontrol->value.integer.value[0] != 1)
+       if (ucontrol->value.enumerated.item[0] != 0
+           && ucontrol->value.enumerated.item[0] != 1)
                return -EINVAL;
 
        reg_eq = snd_soc_read(codec, WM8983_EQ1_LOW_SHELF);
        switch ((reg_eq & WM8983_EQ3DMODE) >> WM8983_EQ3DMODE_SHIFT) {
        case 0:
-               if (!ucontrol->value.integer.value[0])
+               if (!ucontrol->value.enumerated.item[0])
                        return 0;
                break;
        case 1:
-               if (ucontrol->value.integer.value[0])
+               if (ucontrol->value.enumerated.item[0])
                        return 0;
                break;
        }
@@ -537,7 +537,7 @@ static int eqmode_put(struct snd_kcontrol *kcontrol,
        /* set the desired eqmode */
        snd_soc_update_bits(codec, WM8983_EQ1_LOW_SHELF,
                            WM8983_EQ3DMODE_MASK,
-                           ucontrol->value.integer.value[0]
+                           ucontrol->value.enumerated.item[0]
                            << WM8983_EQ3DMODE_SHIFT);
        /* restore DAC/ADC configuration */
        snd_soc_write(codec, WM8983_POWER_MANAGEMENT_2, regpwr2);
index 9918152a03c7518e5c0c2e299a35145abd808a14..6ac76fe116b028db34c8cdbf3691babeb2a68844 100644 (file)
@@ -531,9 +531,9 @@ static int eqmode_get(struct snd_kcontrol *kcontrol,
 
        reg = snd_soc_read(codec, WM8985_EQ1_LOW_SHELF);
        if (reg & WM8985_EQ3DMODE)
-               ucontrol->value.integer.value[0] = 1;
+               ucontrol->value.enumerated.item[0] = 1;
        else
-               ucontrol->value.integer.value[0] = 0;
+               ucontrol->value.enumerated.item[0] = 0;
 
        return 0;
 }
@@ -545,18 +545,18 @@ static int eqmode_put(struct snd_kcontrol *kcontrol,
        unsigned int regpwr2, regpwr3;
        unsigned int reg_eq;
 
-       if (ucontrol->value.integer.value[0] != 0
-                       && ucontrol->value.integer.value[0] != 1)
+       if (ucontrol->value.enumerated.item[0] != 0
+                       && ucontrol->value.enumerated.item[0] != 1)
                return -EINVAL;
 
        reg_eq = snd_soc_read(codec, WM8985_EQ1_LOW_SHELF);
        switch ((reg_eq & WM8985_EQ3DMODE) >> WM8985_EQ3DMODE_SHIFT) {
        case 0:
-               if (!ucontrol->value.integer.value[0])
+               if (!ucontrol->value.enumerated.item[0])
                        return 0;
                break;
        case 1:
-               if (ucontrol->value.integer.value[0])
+               if (ucontrol->value.enumerated.item[0])
                        return 0;
                break;
        }
@@ -573,7 +573,7 @@ static int eqmode_put(struct snd_kcontrol *kcontrol,
        /* set the desired eqmode */
        snd_soc_update_bits(codec, WM8985_EQ1_LOW_SHELF,
                            WM8985_EQ3DMODE_MASK,
-                           ucontrol->value.integer.value[0]
+                           ucontrol->value.enumerated.item[0]
                            << WM8985_EQ3DMODE_SHIFT);
        /* restore DAC/ADC configuration */
        snd_soc_write(codec, WM8985_POWER_MANAGEMENT_2, regpwr2);
index 2ccbb322df775b803d40fe765958f439d855d41f..a18aecb4993590e6a7bd936b238fee544462a6bc 100644 (file)
@@ -362,7 +362,7 @@ static int wm8994_put_drc_enum(struct snd_kcontrol *kcontrol,
        struct wm8994 *control = wm8994->wm8994;
        struct wm8994_pdata *pdata = &control->pdata;
        int drc = wm8994_get_drc(kcontrol->id.name);
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
 
        if (drc < 0)
                return drc;
@@ -469,7 +469,7 @@ static int wm8994_put_retune_mobile_enum(struct snd_kcontrol *kcontrol,
        struct wm8994 *control = wm8994->wm8994;
        struct wm8994_pdata *pdata = &control->pdata;
        int block = wm8994_get_retune_mobile_block(kcontrol->id.name);
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
 
        if (block < 0)
                return block;
index 8d7d6c01a2f7264905e7a8e29888a417f44fb515..f99b34f7647b092a657fbbc3a70fd01219837351 100644 (file)
@@ -416,7 +416,7 @@ static int wm8996_put_retune_mobile_enum(struct snd_kcontrol *kcontrol,
        struct wm8996_priv *wm8996 = snd_soc_codec_get_drvdata(codec);
        struct wm8996_pdata *pdata = &wm8996->pdata;
        int block = wm8996_get_retune_mobile_block(kcontrol->id.name);
-       int value = ucontrol->value.integer.value[0];
+       int value = ucontrol->value.enumerated.item[0];
 
        if (block < 0)
                return block;
index ccb3b15139ad9dacd41ad801201471d1ed696997..363b3b6676163e8c0b110efd165c5eccd7ec9e1a 100644 (file)
@@ -344,9 +344,9 @@ static int speaker_mode_get(struct snd_kcontrol *kcontrol,
 
        reg = snd_soc_read(codec, WM9081_ANALOGUE_SPEAKER_2);
        if (reg & WM9081_SPK_MODE)
-               ucontrol->value.integer.value[0] = 1;
+               ucontrol->value.enumerated.item[0] = 1;
        else
-               ucontrol->value.integer.value[0] = 0;
+               ucontrol->value.enumerated.item[0] = 0;
 
        return 0;
 }
@@ -365,7 +365,7 @@ static int speaker_mode_put(struct snd_kcontrol *kcontrol,
        unsigned int reg2 = snd_soc_read(codec, WM9081_ANALOGUE_SPEAKER_2);
 
        /* Are we changing anything? */
-       if (ucontrol->value.integer.value[0] ==
+       if (ucontrol->value.enumerated.item[0] ==
            ((reg2 & WM9081_SPK_MODE) != 0))
                return 0;
 
@@ -373,7 +373,7 @@ static int speaker_mode_put(struct snd_kcontrol *kcontrol,
        if (reg_pwr & WM9081_SPK_ENA)
                return -EINVAL;
 
-       if (ucontrol->value.integer.value[0]) {
+       if (ucontrol->value.enumerated.item[0]) {
                /* Class AB */
                reg2 &= ~(WM9081_SPK_INV_MUTE | WM9081_OUT_SPK_CTRL);
                reg2 |= WM9081_SPK_MODE;
index 79e143625ac3d9c14ca72f96dc11acb6a970efdb..9849643ef8099c7f3340a719cf160c8ce633f9c5 100644 (file)
@@ -1212,7 +1212,7 @@ static int wm9713_soc_probe(struct snd_soc_codec *codec)
        if (IS_ERR(wm9713->ac97))
                return PTR_ERR(wm9713->ac97);
 
-       regmap = devm_regmap_init_ac97(wm9713->ac97, &wm9713_regmap_config);
+       regmap = regmap_init_ac97(wm9713->ac97, &wm9713_regmap_config);
        if (IS_ERR(regmap)) {
                snd_soc_free_ac97_codec(wm9713->ac97);
                return PTR_ERR(regmap);
index 33806d487b8ae00711dddd4ec400393dbff59b1f..b9195b9c2b05175278d4e779f7db87aad20d9f63 100644 (file)
@@ -586,7 +586,7 @@ static int wm_adsp_fw_get(struct snd_kcontrol *kcontrol,
        struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
        struct wm_adsp *dsp = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.integer.value[0] = dsp[e->shift_l].fw;
+       ucontrol->value.enumerated.item[0] = dsp[e->shift_l].fw;
 
        return 0;
 }
@@ -599,10 +599,10 @@ static int wm_adsp_fw_put(struct snd_kcontrol *kcontrol,
        struct wm_adsp *dsp = snd_soc_codec_get_drvdata(codec);
        int ret = 0;
 
-       if (ucontrol->value.integer.value[0] == dsp[e->shift_l].fw)
+       if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw)
                return 0;
 
-       if (ucontrol->value.integer.value[0] >= WM_ADSP_NUM_FW)
+       if (ucontrol->value.enumerated.item[0] >= WM_ADSP_NUM_FW)
                return -EINVAL;
 
        mutex_lock(&dsp[e->shift_l].pwr_lock);
@@ -610,7 +610,7 @@ static int wm_adsp_fw_put(struct snd_kcontrol *kcontrol,
        if (dsp[e->shift_l].running || dsp[e->shift_l].compr)
                ret = -EBUSY;
        else
-               dsp[e->shift_l].fw = ucontrol->value.integer.value[0];
+               dsp[e->shift_l].fw = ucontrol->value.enumerated.item[0];
 
        mutex_unlock(&dsp[e->shift_l].pwr_lock);
 
index ed8de1035cda159d0d186f2cded0fb7a97fbceb4..40dfd8a3648408a2cc76bb6e4c4c3872c8e28ac3 100644 (file)
@@ -112,6 +112,20 @@ struct fsl_ssi_rxtx_reg_val {
        struct fsl_ssi_reg_val tx;
 };
 
+static const struct reg_default fsl_ssi_reg_defaults[] = {
+       {CCSR_SSI_SCR,     0x00000000},
+       {CCSR_SSI_SIER,    0x00003003},
+       {CCSR_SSI_STCR,    0x00000200},
+       {CCSR_SSI_SRCR,    0x00000200},
+       {CCSR_SSI_STCCR,   0x00040000},
+       {CCSR_SSI_SRCCR,   0x00040000},
+       {CCSR_SSI_SACNT,   0x00000000},
+       {CCSR_SSI_STMSK,   0x00000000},
+       {CCSR_SSI_SRMSK,   0x00000000},
+       {CCSR_SSI_SACCEN,  0x00000000},
+       {CCSR_SSI_SACCDIS, 0x00000000},
+};
+
 static bool fsl_ssi_readable_reg(struct device *dev, unsigned int reg)
 {
        switch (reg) {
@@ -176,7 +190,8 @@ static const struct regmap_config fsl_ssi_regconfig = {
        .val_bits = 32,
        .reg_stride = 4,
        .val_format_endian = REGMAP_ENDIAN_NATIVE,
-       .num_reg_defaults_raw = CCSR_SSI_SACCDIS / sizeof(uint32_t) + 1,
+       .reg_defaults = fsl_ssi_reg_defaults,
+       .num_reg_defaults = ARRAY_SIZE(fsl_ssi_reg_defaults),
        .readable_reg = fsl_ssi_readable_reg,
        .volatile_reg = fsl_ssi_volatile_reg,
        .precious_reg = fsl_ssi_precious_reg,
@@ -186,7 +201,6 @@ static const struct regmap_config fsl_ssi_regconfig = {
 
 struct fsl_ssi_soc_data {
        bool imx;
-       bool imx21regs; /* imx21-class SSI - no SACC{ST,EN,DIS} regs */
        bool offline_config;
        u32 sisr_write_mask;
 };
@@ -289,7 +303,6 @@ static struct fsl_ssi_soc_data fsl_ssi_mpc8610 = {
 
 static struct fsl_ssi_soc_data fsl_ssi_imx21 = {
        .imx = true,
-       .imx21regs = true,
        .offline_config = true,
        .sisr_write_mask = 0,
 };
@@ -573,12 +586,8 @@ static void fsl_ssi_setup_ac97(struct fsl_ssi_private *ssi_private)
         */
        regmap_write(regs, CCSR_SSI_SACNT,
                        CCSR_SSI_SACNT_AC97EN | CCSR_SSI_SACNT_FV);
-
-       /* no SACC{ST,EN,DIS} regs on imx21-class SSI */
-       if (!ssi_private->soc->imx21regs) {
-               regmap_write(regs, CCSR_SSI_SACCDIS, 0xff);
-               regmap_write(regs, CCSR_SSI_SACCEN, 0x300);
-       }
+       regmap_write(regs, CCSR_SSI_SACCDIS, 0xff);
+       regmap_write(regs, CCSR_SSI_SACCEN, 0x300);
 
        /*
         * Enable SSI, Transmit and Receive. AC97 has to communicate with the
@@ -1388,7 +1397,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
        struct resource *res;
        void __iomem *iomem;
        char name[64];
-       struct regmap_config regconfig = fsl_ssi_regconfig;
 
        of_id = of_match_device(fsl_ssi_ids, &pdev->dev);
        if (!of_id || !of_id->data)
@@ -1436,25 +1444,15 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                return PTR_ERR(iomem);
        ssi_private->ssi_phys = res->start;
 
-       if (ssi_private->soc->imx21regs) {
-               /*
-                * According to datasheet imx21-class SSI
-                * don't have SACC{ST,EN,DIS} regs.
-                */
-               regconfig.max_register = CCSR_SSI_SRMSK;
-               regconfig.num_reg_defaults_raw =
-                       CCSR_SSI_SRMSK / sizeof(uint32_t) + 1;
-       }
-
        ret = of_property_match_string(np, "clock-names", "ipg");
        if (ret < 0) {
                ssi_private->has_ipg_clk_name = false;
                ssi_private->regs = devm_regmap_init_mmio(&pdev->dev, iomem,
-                       &regconfig);
+                       &fsl_ssi_regconfig);
        } else {
                ssi_private->has_ipg_clk_name = true;
                ssi_private->regs = devm_regmap_init_mmio_clk(&pdev->dev,
-                       "ipg", iomem, &regconfig);
+                       "ipg", iomem, &fsl_ssi_regconfig);
        }
        if (IS_ERR(ssi_private->regs)) {
                dev_err(&pdev->dev, "Failed to init register map\n");
index 49d7513f429e5424ee3ea081178e087368e74896..e63cd5ecfd8feacd396f835756af90107fd2e2a6 100644 (file)
@@ -217,8 +217,8 @@ static int snd_imx_pcm_mmap(struct snd_pcm_substream *substream,
        struct snd_pcm_runtime *runtime = substream->runtime;
        int ret;
 
-       ret = dma_mmap_writecombine(substream->pcm->card->dev, vma,
-               runtime->dma_area, runtime->dma_addr, runtime->dma_bytes);
+       ret = dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+                         runtime->dma_addr, runtime->dma_bytes);
 
        pr_debug("%s: ret: %d %p %pad 0x%08x\n", __func__, ret,
                        runtime->dma_area,
@@ -247,8 +247,7 @@ static int imx_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream)
        buf->dev.type = SNDRV_DMA_TYPE_DEV;
        buf->dev.dev = pcm->card->dev;
        buf->private_data = NULL;
-       buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-                                          &buf->addr, GFP_KERNEL);
+       buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
        if (!buf->area)
                return -ENOMEM;
        buf->bytes = size;
@@ -330,8 +329,7 @@ static void imx_pcm_free(struct snd_pcm *pcm)
                if (!buf->area)
                        continue;
 
-               dma_free_writecombine(pcm->card->dev, buf->bytes,
-                                     buf->area, buf->addr);
+               dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
                buf->area = NULL;
        }
 }
index 2d3afddb0a2e8c83fef54b36eed42d7ff6b69878..a7b96a9a4e0ecb57876413c848cfa91359d42a92 100644 (file)
@@ -367,8 +367,12 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
        }
        card->dev = &pdev->dev;
        sprintf(codec_name, "i2c-%s:00", drv->acpi_card->codec_id);
+
        /* set correct codec name */
-       strcpy((char *)card->dai_link[2].codec_name, codec_name);
+       for (i = 0; i < ARRAY_SIZE(cht_dailink); i++)
+               if (!strcmp(card->dai_link[i].codec_name, "i2c-10EC5645:00"))
+                       card->dai_link[i].codec_name = kstrdup(codec_name, GFP_KERNEL);
+
        snd_soc_card_set_drvdata(card, drv);
        ret_val = devm_snd_soc_register_card(&pdev->dev, card);
        if (ret_val) {
index 49c09a0add797ff95101bee1923b62254e3928a0..34f46c72a0e27cff8cd37108d27b71029cd088c6 100644 (file)
@@ -94,7 +94,7 @@ static const struct soc_enum lo_enum =
 static int headset_get_switch(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = hs_switch;
+       ucontrol->value.enumerated.item[0] = hs_switch;
        return 0;
 }
 
@@ -104,12 +104,12 @@ static int headset_set_switch(struct snd_kcontrol *kcontrol,
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
        struct snd_soc_dapm_context *dapm = &card->dapm;
 
-       if (ucontrol->value.integer.value[0] == hs_switch)
+       if (ucontrol->value.enumerated.item[0] == hs_switch)
                return 0;
 
        snd_soc_dapm_mutex_lock(dapm);
 
-       if (ucontrol->value.integer.value[0]) {
+       if (ucontrol->value.enumerated.item[0]) {
                pr_debug("hs_set HS path\n");
                snd_soc_dapm_enable_pin_unlocked(dapm, "Headphones");
                snd_soc_dapm_disable_pin_unlocked(dapm, "EPOUT");
@@ -123,7 +123,7 @@ static int headset_set_switch(struct snd_kcontrol *kcontrol,
 
        snd_soc_dapm_mutex_unlock(dapm);
 
-       hs_switch = ucontrol->value.integer.value[0];
+       hs_switch = ucontrol->value.enumerated.item[0];
 
        return 0;
 }
@@ -148,7 +148,7 @@ static void lo_enable_out_pins(struct snd_soc_dapm_context *dapm)
 static int lo_get_switch(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = lo_dac;
+       ucontrol->value.enumerated.item[0] = lo_dac;
        return 0;
 }
 
@@ -158,7 +158,7 @@ static int lo_set_switch(struct snd_kcontrol *kcontrol,
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
        struct snd_soc_dapm_context *dapm = &card->dapm;
 
-       if (ucontrol->value.integer.value[0] == lo_dac)
+       if (ucontrol->value.enumerated.item[0] == lo_dac)
                return 0;
 
        snd_soc_dapm_mutex_lock(dapm);
@@ -168,7 +168,7 @@ static int lo_set_switch(struct snd_kcontrol *kcontrol,
         */
        lo_enable_out_pins(dapm);
 
-       switch (ucontrol->value.integer.value[0]) {
+       switch (ucontrol->value.enumerated.item[0]) {
        case 0:
                pr_debug("set vibra path\n");
                snd_soc_dapm_disable_pin_unlocked(dapm, "VIB1OUT");
@@ -202,7 +202,7 @@ static int lo_set_switch(struct snd_kcontrol *kcontrol,
 
        snd_soc_dapm_mutex_unlock(dapm);
 
-       lo_dac = ucontrol->value.integer.value[0];
+       lo_dac = ucontrol->value.enumerated.item[0];
        return 0;
 }
 
index a294fee431f07363f965a81b4c9ef42eb3a42f58..5a4837dcfce3e615eedf47049ac3923332c8a9bf 100644 (file)
@@ -978,7 +978,7 @@ static int skl_tplg_tlv_control_set(struct snd_kcontrol *kcontrol,
                                return -EFAULT;
                } else {
                        if (copy_from_user(ac->params,
-                                          data + 2 * sizeof(u32), size))
+                                          data + 2, size))
                                return -EFAULT;
                }
 
index e09326158bc2dd0be61ea69f375f37dc6f0cdbe5..2cca055fd806cdb80e7226557af943533680c2f8 100644 (file)
@@ -267,10 +267,8 @@ static int nuc900_dma_mmap(struct snd_pcm_substream *substream,
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
 
-       return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-                                       runtime->dma_area,
-                                       runtime->dma_addr,
-                                       runtime->dma_bytes);
+       return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+                          runtime->dma_addr, runtime->dma_bytes);
 }
 
 static struct snd_pcm_ops nuc900_dma_ops = {
index 190f868e78b24af37d8442d378c06aeb8ec6aaf2..fdecb70431745bf23c7113bf7abcede1cd5306ef 100644 (file)
@@ -133,7 +133,7 @@ static struct snd_soc_ops n810_ops = {
 static int n810_get_spk(struct snd_kcontrol *kcontrol,
                        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = n810_spk_func;
+       ucontrol->value.enumerated.item[0] = n810_spk_func;
 
        return 0;
 }
@@ -143,10 +143,10 @@ static int n810_set_spk(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-       if (n810_spk_func == ucontrol->value.integer.value[0])
+       if (n810_spk_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       n810_spk_func = ucontrol->value.integer.value[0];
+       n810_spk_func = ucontrol->value.enumerated.item[0];
        n810_ext_control(&card->dapm);
 
        return 1;
@@ -155,7 +155,7 @@ static int n810_set_spk(struct snd_kcontrol *kcontrol,
 static int n810_get_jack(struct snd_kcontrol *kcontrol,
                         struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = n810_jack_func;
+       ucontrol->value.enumerated.item[0] = n810_jack_func;
 
        return 0;
 }
@@ -165,10 +165,10 @@ static int n810_set_jack(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-       if (n810_jack_func == ucontrol->value.integer.value[0])
+       if (n810_jack_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       n810_jack_func = ucontrol->value.integer.value[0];
+       n810_jack_func = ucontrol->value.enumerated.item[0];
        n810_ext_control(&card->dapm);
 
        return 1;
@@ -177,7 +177,7 @@ static int n810_set_jack(struct snd_kcontrol *kcontrol,
 static int n810_get_input(struct snd_kcontrol *kcontrol,
                          struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = n810_dmic_func;
+       ucontrol->value.enumerated.item[0] = n810_dmic_func;
 
        return 0;
 }
@@ -187,10 +187,10 @@ static int n810_set_input(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-       if (n810_dmic_func == ucontrol->value.integer.value[0])
+       if (n810_dmic_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       n810_dmic_func = ucontrol->value.integer.value[0];
+       n810_dmic_func = ucontrol->value.enumerated.item[0];
        n810_ext_control(&card->dapm);
 
        return 1;
index 6bb623a2a4dfcfa3cda744ca8bef18575baec2d9..99381a27295bbbd9452f0d0050c1509651373bff 100644 (file)
@@ -156,10 +156,8 @@ static int omap_pcm_mmap(struct snd_pcm_substream *substream,
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
 
-       return dma_mmap_writecombine(substream->pcm->card->dev, vma,
-                                    runtime->dma_area,
-                                    runtime->dma_addr,
-                                    runtime->dma_bytes);
+       return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
+                          runtime->dma_addr, runtime->dma_bytes);
 }
 
 static struct snd_pcm_ops omap_pcm_ops = {
@@ -183,8 +181,7 @@ static int omap_pcm_preallocate_dma_buffer(struct snd_pcm *pcm,
        buf->dev.type = SNDRV_DMA_TYPE_DEV;
        buf->dev.dev = pcm->card->dev;
        buf->private_data = NULL;
-       buf->area = dma_alloc_writecombine(pcm->card->dev, size,
-                                          &buf->addr, GFP_KERNEL);
+       buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
        if (!buf->area)
                return -ENOMEM;
 
@@ -207,8 +204,7 @@ static void omap_pcm_free_dma_buffers(struct snd_pcm *pcm)
                if (!buf->area)
                        continue;
 
-               dma_free_writecombine(pcm->card->dev, buf->bytes,
-                                     buf->area, buf->addr);
+               dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
                buf->area = NULL;
        }
 }
index 5e21f08579d804c5f7895a22e87de6cdff8b79ef..54949242bc7075587e6a73a7235049db2e108734 100644 (file)
@@ -132,7 +132,7 @@ static struct snd_soc_ops rx51_ops = {
 static int rx51_get_spk(struct snd_kcontrol *kcontrol,
                        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = rx51_spk_func;
+       ucontrol->value.enumerated.item[0] = rx51_spk_func;
 
        return 0;
 }
@@ -142,10 +142,10 @@ static int rx51_set_spk(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (rx51_spk_func == ucontrol->value.integer.value[0])
+       if (rx51_spk_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       rx51_spk_func = ucontrol->value.integer.value[0];
+       rx51_spk_func = ucontrol->value.enumerated.item[0];
        rx51_ext_control(&card->dapm);
 
        return 1;
@@ -180,7 +180,7 @@ static int rx51_hp_event(struct snd_soc_dapm_widget *w,
 static int rx51_get_input(struct snd_kcontrol *kcontrol,
                          struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = rx51_dmic_func;
+       ucontrol->value.enumerated.item[0] = rx51_dmic_func;
 
        return 0;
 }
@@ -190,10 +190,10 @@ static int rx51_set_input(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (rx51_dmic_func == ucontrol->value.integer.value[0])
+       if (rx51_dmic_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       rx51_dmic_func = ucontrol->value.integer.value[0];
+       rx51_dmic_func = ucontrol->value.enumerated.item[0];
        rx51_ext_control(&card->dapm);
 
        return 1;
@@ -202,7 +202,7 @@ static int rx51_set_input(struct snd_kcontrol *kcontrol,
 static int rx51_get_jack(struct snd_kcontrol *kcontrol,
                         struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = rx51_jack_func;
+       ucontrol->value.enumerated.item[0] = rx51_jack_func;
 
        return 0;
 }
@@ -212,10 +212,10 @@ static int rx51_set_jack(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (rx51_jack_func == ucontrol->value.integer.value[0])
+       if (rx51_jack_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       rx51_jack_func = ucontrol->value.integer.value[0];
+       rx51_jack_func = ucontrol->value.enumerated.item[0];
        rx51_ext_control(&card->dapm);
 
        return 1;
index c97dc13d36087628433de516a1b2217df02b492b..dcbb7aa9830c0347af6f7bd2c198639e52d9e24e 100644 (file)
@@ -163,7 +163,7 @@ static struct snd_soc_ops corgi_ops = {
 static int corgi_get_jack(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = corgi_jack_func;
+       ucontrol->value.enumerated.item[0] = corgi_jack_func;
        return 0;
 }
 
@@ -172,10 +172,10 @@ static int corgi_set_jack(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (corgi_jack_func == ucontrol->value.integer.value[0])
+       if (corgi_jack_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       corgi_jack_func = ucontrol->value.integer.value[0];
+       corgi_jack_func = ucontrol->value.enumerated.item[0];
        corgi_ext_control(&card->dapm);
        return 1;
 }
@@ -183,7 +183,7 @@ static int corgi_set_jack(struct snd_kcontrol *kcontrol,
 static int corgi_get_spk(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = corgi_spk_func;
+       ucontrol->value.enumerated.item[0] = corgi_spk_func;
        return 0;
 }
 
@@ -192,10 +192,10 @@ static int corgi_set_spk(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-       if (corgi_spk_func == ucontrol->value.integer.value[0])
+       if (corgi_spk_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       corgi_spk_func = ucontrol->value.integer.value[0];
+       corgi_spk_func = ucontrol->value.enumerated.item[0];
        corgi_ext_control(&card->dapm);
        return 1;
 }
index 241d0be42d7a0c6779bb6459c5165728cbedb302..62b8377a9d2b93c81f06a8df4c0b5ed2cd7a567f 100644 (file)
@@ -308,17 +308,17 @@ static int magician_set_spk(struct snd_kcontrol *kcontrol,
 static int magician_get_input(struct snd_kcontrol *kcontrol,
                              struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = magician_in_sel;
+       ucontrol->value.enumerated.item[0] = magician_in_sel;
        return 0;
 }
 
 static int magician_set_input(struct snd_kcontrol *kcontrol,
                              struct snd_ctl_elem_value *ucontrol)
 {
-       if (magician_in_sel == ucontrol->value.integer.value[0])
+       if (magician_in_sel == ucontrol->value.enumerated.item[0])
                return 0;
 
-       magician_in_sel = ucontrol->value.integer.value[0];
+       magician_in_sel = ucontrol->value.enumerated.item[0];
 
        switch (magician_in_sel) {
        case MAGICIAN_MIC:
index 84d0e2e508088a260b54911bca03236a50046203..4b3b714f5ee7a67e5538b191a9da99bac86e56f8 100644 (file)
@@ -138,7 +138,7 @@ static struct snd_soc_ops poodle_ops = {
 static int poodle_get_jack(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = poodle_jack_func;
+       ucontrol->value.enumerated.item[0] = poodle_jack_func;
        return 0;
 }
 
@@ -147,10 +147,10 @@ static int poodle_set_jack(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-       if (poodle_jack_func == ucontrol->value.integer.value[0])
+       if (poodle_jack_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       poodle_jack_func = ucontrol->value.integer.value[0];
+       poodle_jack_func = ucontrol->value.enumerated.item[0];
        poodle_ext_control(&card->dapm);
        return 1;
 }
@@ -158,7 +158,7 @@ static int poodle_set_jack(struct snd_kcontrol *kcontrol,
 static int poodle_get_spk(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = poodle_spk_func;
+       ucontrol->value.enumerated.item[0] = poodle_spk_func;
        return 0;
 }
 
@@ -167,10 +167,10 @@ static int poodle_set_spk(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card =  snd_kcontrol_chip(kcontrol);
 
-       if (poodle_spk_func == ucontrol->value.integer.value[0])
+       if (poodle_spk_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       poodle_spk_func = ucontrol->value.integer.value[0];
+       poodle_spk_func = ucontrol->value.enumerated.item[0];
        poodle_ext_control(&card->dapm);
        return 1;
 }
index b00222620fd01be5cf00d4118d44f18832ad3bdc..0e02634c8b7f6f41764b75344ae850ff7ce586d7 100644 (file)
@@ -164,7 +164,7 @@ static struct snd_soc_ops spitz_ops = {
 static int spitz_get_jack(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = spitz_jack_func;
+       ucontrol->value.enumerated.item[0] = spitz_jack_func;
        return 0;
 }
 
@@ -173,10 +173,10 @@ static int spitz_set_jack(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (spitz_jack_func == ucontrol->value.integer.value[0])
+       if (spitz_jack_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       spitz_jack_func = ucontrol->value.integer.value[0];
+       spitz_jack_func = ucontrol->value.enumerated.item[0];
        spitz_ext_control(&card->dapm);
        return 1;
 }
@@ -184,7 +184,7 @@ static int spitz_set_jack(struct snd_kcontrol *kcontrol,
 static int spitz_get_spk(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = spitz_spk_func;
+       ucontrol->value.enumerated.item[0] = spitz_spk_func;
        return 0;
 }
 
@@ -193,10 +193,10 @@ static int spitz_set_spk(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (spitz_spk_func == ucontrol->value.integer.value[0])
+       if (spitz_spk_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       spitz_spk_func = ucontrol->value.integer.value[0];
+       spitz_spk_func = ucontrol->value.enumerated.item[0];
        spitz_ext_control(&card->dapm);
        return 1;
 }
index 49518dd642aa18ffd3f0004fe2f425210e820b54..c508f024ecfbc206887aeb9f91943311a279aed7 100644 (file)
@@ -95,7 +95,7 @@ static struct snd_soc_ops tosa_ops = {
 static int tosa_get_jack(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = tosa_jack_func;
+       ucontrol->value.enumerated.item[0] = tosa_jack_func;
        return 0;
 }
 
@@ -104,10 +104,10 @@ static int tosa_set_jack(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (tosa_jack_func == ucontrol->value.integer.value[0])
+       if (tosa_jack_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       tosa_jack_func = ucontrol->value.integer.value[0];
+       tosa_jack_func = ucontrol->value.enumerated.item[0];
        tosa_ext_control(&card->dapm);
        return 1;
 }
@@ -115,7 +115,7 @@ static int tosa_set_jack(struct snd_kcontrol *kcontrol,
 static int tosa_get_spk(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
-       ucontrol->value.integer.value[0] = tosa_spk_func;
+       ucontrol->value.enumerated.item[0] = tosa_spk_func;
        return 0;
 }
 
@@ -124,10 +124,10 @@ static int tosa_set_spk(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_card *card = snd_kcontrol_chip(kcontrol);
 
-       if (tosa_spk_func == ucontrol->value.integer.value[0])
+       if (tosa_spk_func == ucontrol->value.enumerated.item[0])
                return 0;
 
-       tosa_spk_func = ucontrol->value.integer.value[0];
+       tosa_spk_func = ucontrol->value.enumerated.item[0];
        tosa_ext_control(&card->dapm);
        return 1;
 }
index 00b6c9d039cfada651dad6e917be9318dc4ce762..e5101e0d2d372262f8ecf7d0498240cba971b86f 100644 (file)
@@ -355,7 +355,6 @@ static struct regmap_config lpass_cpu_regmap_config = {
        .readable_reg = lpass_cpu_regmap_readable,
        .volatile_reg = lpass_cpu_regmap_volatile,
        .cache_type = REGCACHE_FLAT,
-       .val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
 int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev)
index 84d9e77c0fbe1e8d5e737d26beb492c3ce85883b..70a2559b63f9050bcbb86049717518d022463e2f 100644 (file)
@@ -481,10 +481,11 @@ static int i2s_set_sysclk(struct snd_soc_dai *dai,
        unsigned int cdcon_mask = 1 << i2s_regs->cdclkcon_off;
        unsigned int rsrc_mask = 1 << i2s_regs->rclksrc_off;
        u32 mod, mask, val = 0;
+       unsigned long flags;
 
-       spin_lock(i2s->lock);
+       spin_lock_irqsave(i2s->lock, flags);
        mod = readl(i2s->addr + I2SMOD);
-       spin_unlock(i2s->lock);
+       spin_unlock_irqrestore(i2s->lock, flags);
 
        switch (clk_id) {
        case SAMSUNG_I2S_OPCLK:
@@ -575,11 +576,11 @@ static int i2s_set_sysclk(struct snd_soc_dai *dai,
                return -EINVAL;
        }
 
-       spin_lock(i2s->lock);
+       spin_lock_irqsave(i2s->lock, flags);
        mod = readl(i2s->addr + I2SMOD);
        mod = (mod & ~mask) | val;
        writel(mod, i2s->addr + I2SMOD);
-       spin_unlock(i2s->lock);
+       spin_unlock_irqrestore(i2s->lock, flags);
 
        return 0;
 }
@@ -590,6 +591,7 @@ static int i2s_set_fmt(struct snd_soc_dai *dai,
        struct i2s_dai *i2s = to_info(dai);
        int lrp_shift, sdf_shift, sdf_mask, lrp_rlow, mod_slave;
        u32 mod, tmp = 0;
+       unsigned long flags;
 
        lrp_shift = i2s->variant_regs->lrp_off;
        sdf_shift = i2s->variant_regs->sdf_off;
@@ -649,7 +651,7 @@ static int i2s_set_fmt(struct snd_soc_dai *dai,
                return -EINVAL;
        }
 
-       spin_lock(i2s->lock);
+       spin_lock_irqsave(i2s->lock, flags);
        mod = readl(i2s->addr + I2SMOD);
        /*
         * Don't change the I2S mode if any controller is active on this
@@ -657,7 +659,7 @@ static int i2s_set_fmt(struct snd_soc_dai *dai,
         */
        if (any_active(i2s) &&
                ((mod & (sdf_mask | lrp_rlow | mod_slave)) != tmp)) {
-               spin_unlock(i2s->lock);
+               spin_unlock_irqrestore(i2s->lock, flags);
                dev_err(&i2s->pdev->dev,
                                "%s:%d Other DAI busy\n", __func__, __LINE__);
                return -EAGAIN;
@@ -666,7 +668,7 @@ static int i2s_set_fmt(struct snd_soc_dai *dai,
        mod &= ~(sdf_mask | lrp_rlow | mod_slave);
        mod |= tmp;
        writel(mod, i2s->addr + I2SMOD);
-       spin_unlock(i2s->lock);
+       spin_unlock_irqrestore(i2s->lock, flags);
 
        return 0;
 }
@@ -676,6 +678,7 @@ static int i2s_hw_params(struct snd_pcm_substream *substream,
 {
        struct i2s_dai *i2s = to_info(dai);
        u32 mod, mask = 0, val = 0;
+       unsigned long flags;
 
        if (!is_secondary(i2s))
                mask |= (MOD_DC2_EN | MOD_DC1_EN);
@@ -744,11 +747,11 @@ static int i2s_hw_params(struct snd_pcm_substream *substream,
                return -EINVAL;
        }
 
-       spin_lock(i2s->lock);
+       spin_lock_irqsave(i2s->lock, flags);
        mod = readl(i2s->addr + I2SMOD);
        mod = (mod & ~mask) | val;
        writel(mod, i2s->addr + I2SMOD);
-       spin_unlock(i2s->lock);
+       spin_unlock_irqrestore(i2s->lock, flags);
 
        samsung_asoc_init_dma_data(dai, &i2s->dma_playback, &i2s->dma_capture);
 
index 0d37079879002d472c538b69ccc3321c0d7772ad..581175a51ecf730bb8c6a2271d137f049c2a445d 100644 (file)
@@ -3573,7 +3573,7 @@ static int snd_soc_dapm_dai_link_get(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_dapm_widget *w = snd_kcontrol_chip(kcontrol);
 
-       ucontrol->value.integer.value[0] = w->params_select;
+       ucontrol->value.enumerated.item[0] = w->params_select;
 
        return 0;
 }
@@ -3587,13 +3587,13 @@ static int snd_soc_dapm_dai_link_put(struct snd_kcontrol *kcontrol,
        if (w->power)
                return -EBUSY;
 
-       if (ucontrol->value.integer.value[0] == w->params_select)
+       if (ucontrol->value.enumerated.item[0] == w->params_select)
                return 0;
 
-       if (ucontrol->value.integer.value[0] >= w->num_params)
+       if (ucontrol->value.enumerated.item[0] >= w->num_params)
                return -EINVAL;
 
-       w->params_select = ucontrol->value.integer.value[0];
+       w->params_select = ucontrol->value.enumerated.item[0];
 
        return 0;
 }
index 4a96473b180f938b038d5e698f3578876eb7c70c..ee566e8bd1cff56efde9200bf4a76caa840669f1 100644 (file)
@@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE
        $(call rule_mkdir)
        $(call if_changed_dep,cc_i_c)
 
-$(OUTPUT)%.i: %.S FORCE
+$(OUTPUT)%.s: %.S FORCE
        $(call rule_mkdir)
        $(call if_changed_dep,cc_i_c)
 
index 02db3cdff20ff7653c8ca0db21d28ef7b508a9eb..6b7707270aa3b19791c8b6248f90ecddeabc1fdd 100644 (file)
@@ -27,7 +27,7 @@ endef
 #   the rule that uses them - an example for that is the 'bionic'
 #   feature check. ]
 #
-FEATURE_TESTS ?=                       \
+FEATURE_TESTS_BASIC :=                 \
        backtrace                       \
        dwarf                           \
        fortify-source                  \
@@ -46,6 +46,7 @@ FEATURE_TESTS ?=                      \
        libpython                       \
        libpython-version               \
        libslang                        \
+       libcrypto                       \
        libunwind                       \
        pthread-attr-setaffinity-np     \
        stackprotector-all              \
@@ -56,6 +57,25 @@ FEATURE_TESTS ?=                     \
        get_cpuid                       \
        bpf
 
+# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
+# of all feature tests
+FEATURE_TESTS_EXTRA :=                 \
+       bionic                          \
+       compile-32                      \
+       compile-x32                     \
+       cplus-demangle                  \
+       hello                           \
+       libbabeltrace                   \
+       liberty                         \
+       liberty-z                       \
+       libunwind-debug-frame
+
+FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
+
+ifeq ($(FEATURE_TESTS),all)
+  FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA)
+endif
+
 FEATURE_DISPLAY ?=                     \
        dwarf                           \
        glibc                           \
@@ -68,6 +88,7 @@ FEATURE_DISPLAY ?=                    \
        libperl                         \
        libpython                       \
        libslang                        \
+       libcrypto                       \
        libunwind                       \
        libdw-dwarf-unwind              \
        zlib                            \
@@ -100,6 +121,14 @@ ifeq ($(feature-all), 1)
   # test-all.c passed - just set all the core feature flags to 1:
   #
   $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
+  #
+  # test-all.c does not comprise these tests, so we need to
+  # for this case to get features proper values
+  #
+  $(call feature_check,compile-32)
+  $(call feature_check,compile-x32)
+  $(call feature_check,bionic)
+  $(call feature_check,libbabeltrace)
 else
   $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
 endif
index bf8f0352264dcc8a5a1286bc4add2ca9de481357..c5f4c417428d7099fbe4f487a179b0663f478611 100644 (file)
@@ -23,6 +23,7 @@ FILES=                                        \
        test-libpython.bin              \
        test-libpython-version.bin      \
        test-libslang.bin               \
+       test-libcrypto.bin              \
        test-libunwind.bin              \
        test-libunwind-debug-frame.bin  \
        test-pthread-attr-setaffinity-np.bin    \
@@ -105,6 +106,9 @@ $(OUTPUT)test-libaudit.bin:
 $(OUTPUT)test-libslang.bin:
        $(BUILD) -I/usr/include/slang -lslang
 
+$(OUTPUT)test-libcrypto.bin:
+       $(BUILD) -lcrypto
+
 $(OUTPUT)test-gtk2.bin:
        $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
 
index 81025cade45fa9a3fcc6f8f0aacc8f403e6d9211..e499a36c1e4a9e21e9c355309b53a7dc5901664a 100644 (file)
 # include "test-bpf.c"
 #undef main
 
+#define main main_test_libcrypto
+# include "test-libcrypto.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
        main_test_libpython();
@@ -158,6 +162,7 @@ int main(int argc, char *argv[])
        main_test_lzma();
        main_test_get_cpuid();
        main_test_bpf();
+       main_test_libcrypto();
 
        return 0;
 }
index 31dbf45bf99c5996ca0ee62cba186d912b08e666..c54e6551ae4c54cd6e9f418e6e8ff97280444efd 100644 (file)
@@ -1,4 +1,6 @@
+#include <stdio.h>
 int main(void)
 {
+       printf("Hello World!\n");
        return 0;
 }
diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c
new file mode 100644 (file)
index 0000000..bd79dc7
--- /dev/null
@@ -0,0 +1,17 @@
+#include <openssl/sha.h>
+#include <openssl/md5.h>
+
+int main(void)
+{
+       MD5_CTX context;
+       unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
+       unsigned char dat[] = "12345";
+
+       MD5_Init(&context);
+       MD5_Update(&context, &dat[0], sizeof(dat));
+       MD5_Final(&md[0], &context);
+
+       SHA1(&dat[0], sizeof(dat), &md[0]);
+
+       return 0;
+}
index e8b8a23b9bf4cdac8ccfcf95690f8dfd93e1d6e6..954c644f7ad9e5ee0f54bdf16cf2b68766f1e2a5 100644 (file)
@@ -1,3 +1,4 @@
 libapi-y += fd/
 libapi-y += fs/
 libapi-y += cpu.o
+libapi-y += debug.o
index d85904dc9b38747dc0feda60f819cf93ceabbdcf..bbc82c614bee62eec8563c5b1dd9fd222eafa5de 100644 (file)
@@ -18,6 +18,7 @@ LIBFILE = $(OUTPUT)libapi.a
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
 CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -I$(srctree)/tools/lib/api
 
 RM = rm -f
 
diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h
new file mode 100644 (file)
index 0000000..188f788
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef __API_DEBUG_INTERNAL_H__
+#define __API_DEBUG_INTERNAL_H__
+
+#include "debug.h"
+
+#define __pr(func, fmt, ...)   \
+do {                           \
+       if ((func))             \
+               (func)("libapi: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+extern libapi_print_fn_t __pr_warning;
+extern libapi_print_fn_t __pr_info;
+extern libapi_print_fn_t __pr_debug;
+
+#define pr_warning(fmt, ...)   __pr(__pr_warning, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...)      __pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...)     __pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+#endif /* __API_DEBUG_INTERNAL_H__ */
diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c
new file mode 100644 (file)
index 0000000..5fa5cf5
--- /dev/null
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include "debug.h"
+#include "debug-internal.h"
+
+static int __base_pr(const char *format, ...)
+{
+       va_list args;
+       int err;
+
+       va_start(args, format);
+       err = vfprintf(stderr, format, args);
+       va_end(args);
+       return err;
+}
+
+libapi_print_fn_t __pr_warning = __base_pr;
+libapi_print_fn_t __pr_info    = __base_pr;
+libapi_print_fn_t __pr_debug;
+
+void libapi_set_print(libapi_print_fn_t warn,
+                     libapi_print_fn_t info,
+                     libapi_print_fn_t debug)
+{
+       __pr_warning = warn;
+       __pr_info    = info;
+       __pr_debug   = debug;
+}
diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h
new file mode 100644 (file)
index 0000000..a0872f6
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __API_DEBUG_H__
+#define __API_DEBUG_H__
+
+typedef int (*libapi_print_fn_t)(const char *, ...);
+
+void libapi_set_print(libapi_print_fn_t warn,
+                     libapi_print_fn_t info,
+                     libapi_print_fn_t debug);
+
+#endif /* __API_DEBUG_H__ */
index 459599d1b6c410b7b41333c13f248ee685c2dd4a..ef78c22ff44d4142f01a1f5aea3bbe4e15fd0101 100644 (file)
@@ -13,6 +13,7 @@
 #include <sys/mount.h>
 
 #include "fs.h"
+#include "debug-internal.h"
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
@@ -300,6 +301,56 @@ int filename__read_ull(const char *filename, unsigned long long *value)
        return err;
 }
 
+#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
+
+int filename__read_str(const char *filename, char **buf, size_t *sizep)
+{
+       size_t size = 0, alloc_size = 0;
+       void *bf = NULL, *nbf;
+       int fd, n, err = 0;
+       char sbuf[STRERR_BUFSIZE];
+
+       fd = open(filename, O_RDONLY);
+       if (fd < 0)
+               return -errno;
+
+       do {
+               if (size == alloc_size) {
+                       alloc_size += BUFSIZ;
+                       nbf = realloc(bf, alloc_size);
+                       if (!nbf) {
+                               err = -ENOMEM;
+                               break;
+                       }
+
+                       bf = nbf;
+               }
+
+               n = read(fd, bf + size, alloc_size - size);
+               if (n < 0) {
+                       if (size) {
+                               pr_warning("read failed %d: %s\n", errno,
+                                        strerror_r(errno, sbuf, sizeof(sbuf)));
+                               err = 0;
+                       } else
+                               err = -errno;
+
+                       break;
+               }
+
+               size += n;
+       } while (n > 0);
+
+       if (!err) {
+               *sizep = size;
+               *buf   = bf;
+       } else
+               free(bf);
+
+       close(fd);
+       return err;
+}
+
 int sysfs__read_ull(const char *entry, unsigned long long *value)
 {
        char path[PATH_MAX];
@@ -326,6 +377,19 @@ int sysfs__read_int(const char *entry, int *value)
        return filename__read_int(path, value);
 }
 
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+       char path[PATH_MAX];
+       const char *sysfs = sysfs__mountpoint();
+
+       if (!sysfs)
+               return -1;
+
+       snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+       return filename__read_str(path, buf, sizep);
+}
+
 int sysctl__read_int(const char *sysctl, int *value)
 {
        char path[PATH_MAX];
index d024a7f682f69b27a4bba1aed7b8f1894d07b50f..9f6598098dc5804a5bf660013ad5b07c499b6169 100644 (file)
@@ -2,6 +2,7 @@
 #define __API_FS__
 
 #include <stdbool.h>
+#include <unistd.h>
 
 /*
  * On most systems <limits.h> would have given us this, but  not on some systems
@@ -26,8 +27,10 @@ FS(tracefs)
 
 int filename__read_int(const char *filename, int *value);
 int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
 int sysctl__read_int(const char *sysctl, int *value);
 int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
 #endif /* __API_FS__ */
index 8334a5a9d5d7f55e97144e1d7462c52deec02e02..7e543c3102d4118a09717bb6fb4c0f779532ae91 100644 (file)
@@ -201,6 +201,7 @@ struct bpf_object {
                        Elf_Data *data;
                } *reloc;
                int nr_reloc;
+               int maps_shndx;
        } efile;
        /*
         * All loaded bpf_object is linked in a list, which is
@@ -350,6 +351,7 @@ static struct bpf_object *bpf_object__new(const char *path,
         */
        obj->efile.obj_buf = obj_buf;
        obj->efile.obj_buf_sz = obj_buf_sz;
+       obj->efile.maps_shndx = -1;
 
        obj->loaded = false;
 
@@ -529,12 +531,12 @@ bpf_object__init_maps(struct bpf_object *obj, void *data,
 }
 
 static int
-bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
+bpf_object__init_maps_name(struct bpf_object *obj)
 {
        int i;
        Elf_Data *symbols = obj->efile.symbols;
 
-       if (!symbols || maps_shndx < 0)
+       if (!symbols || obj->efile.maps_shndx < 0)
                return -EINVAL;
 
        for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
@@ -544,7 +546,7 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx)
 
                if (!gelf_getsym(symbols, i, &sym))
                        continue;
-               if (sym.st_shndx != maps_shndx)
+               if (sym.st_shndx != obj->efile.maps_shndx)
                        continue;
 
                map_name = elf_strptr(obj->efile.elf,
@@ -572,7 +574,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
        Elf *elf = obj->efile.elf;
        GElf_Ehdr *ep = &obj->efile.ehdr;
        Elf_Scn *scn = NULL;
-       int idx = 0, err = 0, maps_shndx = -1;
+       int idx = 0, err = 0;
 
        /* Elf is corrupted/truncated, avoid calling elf_strptr. */
        if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
@@ -625,7 +627,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                else if (strcmp(name, "maps") == 0) {
                        err = bpf_object__init_maps(obj, data->d_buf,
                                                    data->d_size);
-                       maps_shndx = idx;
+                       obj->efile.maps_shndx = idx;
                } else if (sh.sh_type == SHT_SYMTAB) {
                        if (obj->efile.symbols) {
                                pr_warning("bpf: multiple SYMTAB in %s\n",
@@ -674,8 +676,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
                pr_warning("Corrupted ELF file: index of strtab invalid\n");
                return LIBBPF_ERRNO__FORMAT;
        }
-       if (maps_shndx >= 0)
-               err = bpf_object__init_maps_name(obj, maps_shndx);
+       if (obj->efile.maps_shndx >= 0)
+               err = bpf_object__init_maps_name(obj);
 out:
        return err;
 }
@@ -697,7 +699,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
 static int
 bpf_program__collect_reloc(struct bpf_program *prog,
                           size_t nr_maps, GElf_Shdr *shdr,
-                          Elf_Data *data, Elf_Data *symbols)
+                          Elf_Data *data, Elf_Data *symbols,
+                          int maps_shndx)
 {
        int i, nrels;
 
@@ -724,9 +727,6 @@ bpf_program__collect_reloc(struct bpf_program *prog,
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
-               insn_idx = rel.r_offset / sizeof(struct bpf_insn);
-               pr_debug("relocation: insn_idx=%u\n", insn_idx);
-
                if (!gelf_getsym(symbols,
                                 GELF_R_SYM(rel.r_info),
                                 &sym)) {
@@ -735,6 +735,15 @@ bpf_program__collect_reloc(struct bpf_program *prog,
                        return -LIBBPF_ERRNO__FORMAT;
                }
 
+               if (sym.st_shndx != maps_shndx) {
+                       pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
+                                  prog->section_name, sym.st_shndx);
+                       return -LIBBPF_ERRNO__RELOC;
+               }
+
+               insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+               pr_debug("relocation: insn_idx=%u\n", insn_idx);
+
                if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
                        pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
                                   insn_idx, insns[insn_idx].code);
@@ -863,7 +872,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 
                err = bpf_program__collect_reloc(prog, nr_maps,
                                                 shdr, data,
-                                                obj->efile.symbols);
+                                                obj->efile.symbols,
+                                                obj->efile.maps_shndx);
                if (err)
                        return err;
        }
index 90d2baeb621a5265f703ecc0e3bc0cbde60ac1f4..1d57af56814b8671c10d9619202f042ad5c031d4 100644 (file)
@@ -100,7 +100,7 @@ include $(srctree)/tools/build/Makefile.include
 
 do_compile_shared_library =                    \
        ($(print_shared_lib_compile)            \
-       $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so))
+       $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -sf $@ liblockdep.so))
 
 do_build_static_lib =                          \
        ($(print_static_lib_build)              \
index 9be663340f0a4c48e9ec51c996b13c3a8b596fdc..d1c89cc06f5f6710471c42dc5b2a475a54ee7658 100644 (file)
@@ -11,11 +11,6 @@ static __thread struct task_struct current_obj;
 bool debug_locks = true;
 bool debug_locks_silent;
 
-__attribute__((constructor)) static void liblockdep_init(void)
-{
-       lockdep_init();
-}
-
 __attribute__((destructor)) static void liblockdep_exit(void)
 {
        debug_check_no_locks_held();
index a60c14b9662aefd6f17b47dec534bce2f53d9f03..6e66277ec4375c84693b25756fdf8336faf79fcf 100644 (file)
@@ -44,7 +44,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 void lock_release(struct lockdep_map *lock, int nested,
                        unsigned long ip);
 extern void debug_check_no_locks_freed(const void *from, unsigned long len);
-extern void lockdep_init(void);
 
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
        { .name = (_name), .key = (void *)(_key), }
index f42b7e9aa48f2661fb94fced614db8f443eca9c4..a0a2e3a266af8122fed3d66b3f5a537ab3c1c773 100644 (file)
@@ -1,2 +1,8 @@
 #include <linux/lockdep.h>
+
+/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
+#define hlist_for_each_entry_rcu       hlist_for_each_entry
+#define hlist_add_head_rcu             hlist_add_head
+#define hlist_del_rcu                  hlist_del
+
 #include "../../../kernel/locking/lockdep.c"
index 21cdf869a01b8b7facc084c4dc66cf73afa163bb..52844847569c99b878c031da8f25a13d1c0b5838 100644 (file)
@@ -439,7 +439,5 @@ __attribute__((constructor)) static void init_preload(void)
        ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
 #endif
 
-       lockdep_init();
-
        __init_state = done;
 }
index 0f782ff404ac65999c8689555967eabb9c6f0e52..18211a5f354fe53e2a0650fe1ca9150956fd5b7a 100644 (file)
@@ -1,13 +1,13 @@
 #include <liblockdep/mutex.h>
 
-void main(void)
+int main(void)
 {
-       pthread_mutex_t a, b;
+       pthread_mutex_t a;
 
        pthread_mutex_init(&a, NULL);
-       pthread_mutex_init(&b, NULL);
 
        pthread_mutex_lock(&a);
-       pthread_mutex_lock(&b);
        pthread_mutex_lock(&a);
+
+       return 0;
 }
diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c
new file mode 100644 (file)
index 0000000..0f782ff
--- /dev/null
@@ -0,0 +1,13 @@
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+       pthread_mutex_t a, b;
+
+       pthread_mutex_init(&a, NULL);
+       pthread_mutex_init(&b, NULL);
+
+       pthread_mutex_lock(&a);
+       pthread_mutex_lock(&b);
+       pthread_mutex_lock(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c
new file mode 100644 (file)
index 0000000..cd807d7
--- /dev/null
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <pthread.h>
+
+pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
+pthread_barrier_t bar;
+
+void *ba_lock(void *arg)
+{
+       int ret, i;
+
+       pthread_mutex_lock(&b);
+
+       if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+               pthread_barrier_destroy(&bar);
+
+       pthread_mutex_lock(&a);
+
+       pthread_mutex_unlock(&a);
+       pthread_mutex_unlock(&b);
+}
+
+int main(void)
+{
+       pthread_t t;
+
+       pthread_barrier_init(&bar, NULL, 2);
+
+       if (pthread_create(&t, NULL, ba_lock, NULL)) {
+               fprintf(stderr, "pthread_create() failed\n");
+               return 1;
+       }
+       pthread_mutex_lock(&a);
+
+       if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+               pthread_barrier_destroy(&bar);
+
+       pthread_mutex_lock(&b);
+
+       pthread_mutex_unlock(&b);
+       pthread_mutex_unlock(&a);
+
+       pthread_join(t, NULL);
+
+       return 0;
+}
index 6386dc3182a0985a40551d0af980e53102e04c2a..fd3e56a83fc27025a9016cae2eaf1882c751cc01 100644 (file)
@@ -3,6 +3,7 @@
 
 #define __used         __attribute__((__unused__))
 #define unlikely
+#define READ_ONCE(x) (x)
 #define WRITE_ONCE(x, val) x=(val)
 #define RCU_INIT_POINTER(p, v) p=(v)
 
index c3bd294a63d1f7e4a75b2a950bbe398f33703cd6..190cc886ab9105ea6be58ad58caf8e940274c4a9 100644 (file)
@@ -1951,6 +1951,7 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok)
                   strcmp(token, "*") == 0 ||
                   strcmp(token, "^") == 0 ||
                   strcmp(token, "/") == 0 ||
+                  strcmp(token, "%") == 0 ||
                   strcmp(token, "<") == 0 ||
                   strcmp(token, ">") == 0 ||
                   strcmp(token, "<=") == 0 ||
@@ -2397,6 +2398,12 @@ static int arg_num_eval(struct print_arg *arg, long long *val)
                                break;
                        *val = left + right;
                        break;
+               case '~':
+                       ret = arg_num_eval(arg->op.right, &right);
+                       if (!ret)
+                               break;
+                       *val = ~right;
+                       break;
                default:
                        do_warning("unknown op '%s'", arg->op.op);
                        ret = 0;
@@ -2634,6 +2641,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok)
 
 free_field:
        free_arg(arg->hex.field);
+       arg->hex.field = NULL;
 out:
        *tok = NULL;
        return EVENT_ERROR;
@@ -2658,8 +2666,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
 
 free_size:
        free_arg(arg->int_array.count);
+       arg->int_array.count = NULL;
 free_field:
        free_arg(arg->int_array.field);
+       arg->int_array.field = NULL;
 out:
        *tok = NULL;
        return EVENT_ERROR;
@@ -3689,6 +3699,9 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg
                case '/':
                        val = left / right;
                        break;
+               case '%':
+                       val = left % right;
+                       break;
                case '*':
                        val = left * right;
                        break;
@@ -4971,7 +4984,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event
                                                break;
                                        }
                                }
-                               if (pevent->long_size == 8 && ls &&
+                               if (pevent->long_size == 8 && ls == 1 &&
                                    sizeof(long) != 8) {
                                        char *p;
 
@@ -5335,41 +5348,45 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
        return false;
 }
 
-void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
-                       struct pevent_record *record, bool use_trace_clock)
+/**
+ * pevent_find_event_by_record - return the event from a given record
+ * @pevent: a handle to the pevent
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record)
 {
-       static const char *spaces = "                    "; /* 20 spaces */
-       struct event_format *event;
-       unsigned long secs;
-       unsigned long usecs;
-       unsigned long nsecs;
-       const char *comm;
-       void *data = record->data;
        int type;
-       int pid;
-       int len;
-       int p;
-       bool use_usec_format;
-
-       use_usec_format = is_timestamp_in_us(pevent->trace_clock,
-                                                       use_trace_clock);
-       if (use_usec_format) {
-               secs = record->ts / NSECS_PER_SEC;
-               nsecs = record->ts - secs * NSECS_PER_SEC;
-       }
 
        if (record->size < 0) {
                do_warning("ug! negative record size %d", record->size);
-               return;
+               return NULL;
        }
 
-       type = trace_parse_common_type(pevent, data);
+       type = trace_parse_common_type(pevent, record->data);
 
-       event = pevent_find_event(pevent, type);
-       if (!event) {
-               do_warning("ug! no event found for type %d", type);
-               return;
-       }
+       return pevent_find_event(pevent, type);
+}
+
+/**
+ * pevent_print_event_task - Write the event task comm, pid and CPU
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the tasks comm, pid and CPU to @s.
+ */
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record)
+{
+       void *data = record->data;
+       const char *comm;
+       int pid;
 
        pid = parse_common_pid(pevent, data);
        comm = find_cmdline(pevent, pid);
@@ -5377,9 +5394,43 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
        if (pevent->latency_format) {
                trace_seq_printf(s, "%8.8s-%-5d %3d",
                       comm, pid, record->cpu);
-               pevent_data_lat_fmt(pevent, s, record);
        } else
                trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+}
+
+/**
+ * pevent_print_event_time - Write the event timestamp
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ *
+ * Writes the timestamp of the record into @s.
+ */
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record,
+                            bool use_trace_clock)
+{
+       unsigned long secs;
+       unsigned long usecs;
+       unsigned long nsecs;
+       int p;
+       bool use_usec_format;
+
+       use_usec_format = is_timestamp_in_us(pevent->trace_clock,
+                                                       use_trace_clock);
+       if (use_usec_format) {
+               secs = record->ts / NSECS_PER_SEC;
+               nsecs = record->ts - secs * NSECS_PER_SEC;
+       }
+
+       if (pevent->latency_format) {
+               trace_seq_printf(s, " %3d", record->cpu);
+               pevent_data_lat_fmt(pevent, s, record);
+       } else
+               trace_seq_printf(s, " [%03d]", record->cpu);
 
        if (use_usec_format) {
                if (pevent->flags & PEVENT_NSEC_OUTPUT) {
@@ -5387,14 +5438,36 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
                        p = 9;
                } else {
                        usecs = (nsecs + 500) / NSECS_PER_USEC;
+                       /* To avoid usecs larger than 1 sec */
+                       if (usecs >= 1000000) {
+                               usecs -= 1000000;
+                               secs++;
+                       }
                        p = 6;
                }
 
-               trace_seq_printf(s, " %5lu.%0*lu: %s: ",
-                                       secs, p, usecs, event->name);
+               trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs);
        } else
-               trace_seq_printf(s, " %12llu: %s: ",
-                                       record->ts, event->name);
+               trace_seq_printf(s, " %12llu:", record->ts);
+}
+
+/**
+ * pevent_print_event_data - Write the event data section
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the parsing of the record's data to @s.
+ */
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record)
+{
+       static const char *spaces = "                    "; /* 20 spaces */
+       int len;
+
+       trace_seq_printf(s, " %s: ", event->name);
 
        /* Space out the event names evenly. */
        len = strlen(event->name);
@@ -5404,6 +5477,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
        pevent_event_info(s, event, record);
 }
 
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+                       struct pevent_record *record, bool use_trace_clock)
+{
+       struct event_format *event;
+
+       event = pevent_find_event_by_record(pevent, record);
+       if (!event) {
+               do_warning("ug! no event found for type %d",
+                          trace_parse_common_type(pevent, record->data));
+               return;
+       }
+
+       pevent_print_event_task(pevent, s, event, record);
+       pevent_print_event_time(pevent, s, event, record, use_trace_clock);
+       pevent_print_event_data(pevent, s, event, record);
+}
+
 static int events_id_cmp(const void *a, const void *b)
 {
        struct event_format * const * ea = a;
index 706d9bc24066cf308ba17d718eafd95c4ff5550c..9ffde377e89d912ffb2127fe454872346759738b 100644 (file)
@@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt,
                                 unsigned long long addr);
 int pevent_pid_is_registered(struct pevent *pevent, int pid);
 
+void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record);
+void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record,
+                            bool use_trace_clock);
+void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s,
+                            struct event_format *event,
+                            struct pevent_record *record);
 void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
                        struct pevent_record *record, bool use_trace_clock);
 
@@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id);
 struct event_format *
 pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
 
+struct event_format *
+pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record);
+
 void pevent_data_lat_fmt(struct pevent *pevent,
                         struct trace_seq *s, struct pevent_record *record);
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
index b9ca1e304158da86f36dafad33e7de0a52e01567..15949e2a7805cc6e7d848ddbf2f99b249e332f3b 100644 (file)
@@ -8,7 +8,7 @@ perf-config - Get and set variables in a configuration file.
 SYNOPSIS
 --------
 [verse]
-'perf config' -l | --list
+'perf config' [<file-option>] -l | --list
 
 DESCRIPTION
 -----------
@@ -21,6 +21,14 @@ OPTIONS
 --list::
        Show current config variables, name and value, for all sections.
 
+--user::
+       For writing and reading options: write to user
+       '$HOME/.perfconfig' file or read it.
+
+--system::
+       For writing and reading options: write to system-wide
+       '$(sysconfdir)/perfconfig' or read it.
+
 CONFIGURATION FILE
 ------------------
 
@@ -30,6 +38,10 @@ The '$HOME/.perfconfig' file is used to store a per-user configuration.
 The file '$(sysconfdir)/perfconfig' can be used to
 store a system-wide default configuration.
 
+When reading or writing, the values are read from the system and user
+configuration files by default, and options '--system' and '--user'
+can be used to tell the command to read from or write to only that location.
+
 Syntax
 ~~~~~~
 
@@ -62,7 +74,7 @@ Given a $HOME/.perfconfig like this:
                medium = green, default
                normal = lightgray, default
                selected = white, lightgray
-               code = blue, default
+               jump_arrows = blue, default
                addr = magenta, default
                root = white, blue
 
@@ -98,6 +110,347 @@ Given a $HOME/.perfconfig like this:
                order = caller
                sort-key = function
 
+Variables
+~~~~~~~~~
+
+colors.*::
+       The variables for customizing the colors used in the output for the
+       'report', 'top' and 'annotate' in the TUI. They should specify the
+       foreground and background colors, separated by a comma, for example:
+
+               medium = green, lightgray
+
+       If you want to use the color configured for you terminal, just leave it
+       as 'default', for example:
+
+               medium = default, lightgray
+
+       Available colors:
+       red, yellow, green, cyan, gray, black, blue,
+       white, default, magenta, lightgray
+
+       colors.top::
+               'top' means a overhead percentage which is more than 5%.
+               And values of this variable specify percentage colors.
+               Basic key values are foreground-color 'red' and
+               background-color 'default'.
+       colors.medium::
+               'medium' means a overhead percentage which has more than 0.5%.
+               Default values are 'green' and 'default'.
+       colors.normal::
+               'normal' means the rest of overhead percentages
+               except 'top', 'medium', 'selected'.
+               Default values are 'lightgray' and 'default'.
+       colors.selected::
+               This selects the colors for the current entry in a list of entries
+               from sub-commands (top, report, annotate).
+               Default values are 'black' and 'lightgray'.
+       colors.jump_arrows::
+               Colors for jump arrows on assembly code listings
+               such as 'jns', 'jmp', 'jane', etc.
+               Default values are 'blue', 'default'.
+       colors.addr::
+               This selects colors for addresses from 'annotate'.
+               Default values are 'magenta', 'default'.
+       colors.root::
+               Colors for headers in the output of a sub-commands (top, report).
+               Default values are 'white', 'blue'.
+
+tui.*, gtk.*::
+       Subcommands that can be configured here are 'top', 'report' and 'annotate'.
+       These values are booleans, for example:
+
+       [tui]
+               top = true
+
+       will make the TUI be the default for the 'top' subcommand. Those will be
+       available if the required libs were detected at tool build time.
+
+buildid.*::
+       buildid.dir::
+               Each executable and shared library in modern distributions comes with a
+               content based identifier that, if available, will be inserted in a
+               'perf.data' file header to, at analysis time find what is needed to do
+               symbol resolution, code annotation, etc.
+
+               The recording tools also stores a hard link or copy in a per-user
+               directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms
+               and /proc/kcore files to be used at analysis time.
+
+               The buildid.dir variable can be used to either change this directory
+               cache location, or to disable it altogether. If you want to disable it,
+               set buildid.dir to /dev/null. The default is $HOME/.debug
+
+annotate.*::
+       These options work only for TUI.
+       These are in control of addresses, jump function, source code
+       in lines of assembly code from a specific program.
+
+       annotate.hide_src_code::
+               If a program which is analyzed has source code,
+               this option lets 'annotate' print a list of assembly code with the source code.
+               For example, let's see a part of a program. There're four lines.
+               If this option is 'true', they can be printed
+               without source code from a program as below.
+
+               â”‚        push   %rbp
+               â”‚        mov    %rsp,%rbp
+               â”‚        sub    $0x10,%rsp
+               â”‚        mov    (%rdi),%rdx
+
+               But if this option is 'false', source code of the part
+               can be also printed as below. Default is 'false'.
+
+               â”‚      struct rb_node *rb_next(const struct rb_node *node)
+               â”‚      {
+               â”‚        push   %rbp
+               â”‚        mov    %rsp,%rbp
+               â”‚        sub    $0x10,%rsp
+               â”‚              struct rb_node *parent;
+               â”‚
+               â”‚              if (RB_EMPTY_NODE(node))
+               â”‚        mov    (%rdi),%rdx
+               â”‚              return n;
+
+        annotate.use_offset::
+               Basing on a first address of a loaded function, offset can be used.
+               Instead of using original addresses of assembly code,
+               addresses subtracted from a base address can be printed.
+               Let's illustrate an example.
+               If a base address is 0XFFFFFFFF81624d50 as below,
+
+               ffffffff81624d50 <load0>
+
+               an address on assembly code has a specific absolute address as below
+
+               ffffffff816250b8:│  mov    0x8(%r14),%rdi
+
+               but if use_offset is 'true', an address subtracted from a base address is printed.
+               Default is true. This option is only applied to TUI.
+
+                            368:│  mov    0x8(%r14),%rdi
+
+       annotate.jump_arrows::
+               There can be jump instruction among assembly code.
+               Depending on a boolean value of jump_arrows,
+               arrows can be printed or not which represent
+               where do the instruction jump into as below.
+
+               â”‚     â”Œâ”€â”€jmp    1333
+               â”‚     â”‚  xchg   %ax,%ax
+               â”‚1330:│  mov    %r15,%r10
+               â”‚1333:└─→cmp    %r15,%r14
+
+               If jump_arrow is 'false', the arrows isn't printed as below.
+               Default is 'false'.
+
+               â”‚      â†“ jmp    1333
+               â”‚        xchg   %ax,%ax
+               â”‚1330:   mov    %r15,%r10
+               â”‚1333:   cmp    %r15,%r14
+
+        annotate.show_linenr::
+               When showing source code if this option is 'true',
+               line numbers are printed as below.
+
+               â”‚1628         if (type & PERF_SAMPLE_IDENTIFIER) {
+               â”‚     â†“ jne    508
+               â”‚1628                 data->id = *array;
+               â”‚1629                 array++;
+               â”‚1630         }
+
+               However if this option is 'false', they aren't printed as below.
+               Default is 'false'.
+
+               â”‚             if (type & PERF_SAMPLE_IDENTIFIER) {
+               â”‚     â†“ jne    508
+               â”‚                     data->id = *array;
+               â”‚                     array++;
+               â”‚             }
+
+        annotate.show_nr_jumps::
+               Let's see a part of assembly code.
+
+               â”‚1382:   movb   $0x1,-0x270(%rbp)
+
+               If use this, the number of branches jumping to that address can be printed as below.
+               Default is 'false'.
+
+               â”‚1 1382:   movb   $0x1,-0x270(%rbp)
+
+        annotate.show_total_period::
+               To compare two records on an instruction base, with this option
+               provided, display total number of samples that belong to a line
+               in assembly code. If this option is 'true', total periods are printed
+               instead of percent values as below.
+
+                 302 â”‚      mov    %eax,%eax
+
+               But if this option is 'false', percent values for overhead are printed i.e.
+               Default is 'false'.
+
+               99.93 â”‚      mov    %eax,%eax
+
+hist.*::
+       hist.percentage::
+               This option control the way to calculate overhead of filtered entries -
+               that means the value of this option is effective only if there's a
+               filter (by comm, dso or symbol name). Suppose a following example:
+
+                      Overhead  Symbols
+                      ........  .......
+                       33.33%     foo
+                       33.33%     bar
+                       33.33%     baz
+
+              This is an original overhead and we'll filter out the first 'foo'
+              entry. The value of 'relative' would increase the overhead of 'bar'
+              and 'baz' to 50.00% for each, while 'absolute' would show their
+              current overhead (33.33%).
+
+ui.*::
+       ui.show-headers::
+               This option controls display of column headers (like 'Overhead' and 'Symbol')
+               in 'report' and 'top'. If this option is false, they are hidden.
+               This option is only applied to TUI.
+
+call-graph.*::
+       When sub-commands 'top' and 'report' work with -g/—-children
+       there're options in control of call-graph.
+
+       call-graph.record-mode::
+               The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
+               The value of 'dwarf' is effective only if perf detect needed library
+               (libunwind or a recent version of libdw).
+               'lbr' only work for cpus that support it.
+
+       call-graph.dump-size::
+               The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
+               When using dwarf into record-mode, the default size will be used if omitted.
+
+       call-graph.print-type::
+               The print-types can be graph (graph absolute), fractal (graph relative),
+               flat and folded. This option controls a way to show overhead for each callchain
+               entry. Suppose a following example.
+
+                Overhead  Symbols
+                ........  .......
+                  40.00%  foo
+                          |
+                          ---foo
+                             |
+                             |--50.00%--bar
+                             |          main
+                             |
+                              --50.00%--baz
+                                        main
+
+               This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly
+               half and half so 'fractal' shows 50.00% for each
+               (meaning that it assumes 100% total overhead of 'foo').
+
+               The 'graph' uses absolute overhead value of 'foo' as total so each of
+               'bar' and 'baz' callchain will have 20.00% of overhead.
+               If 'flat' is used, single column and linear exposure of call chains.
+               'folded' mean call chains are displayed in a line, separated by semicolons.
+
+       call-graph.order::
+               This option controls print order of callchains. The default is
+               'callee' which means callee is printed at top and then followed by its
+               caller and so on. The 'caller' prints it in reverse order.
+
+               If this option is not set and report.children or top.children is
+               set to true (or the equivalent command line option is given),
+               the default value of this option is changed to 'caller' for the
+               execution of 'perf report' or 'perf top'. Other commands will
+               still default to 'callee'.
+
+       call-graph.sort-key::
+               The callchains are merged if they contain same information.
+               The sort-key option determines a way to compare the callchains.
+               A value of 'sort-key' can be 'function' or 'address'.
+               The default is 'function'.
+
+       call-graph.threshold::
+               When there're many callchains it'd print tons of lines. So perf omits
+               small callchains under a certain overhead (threshold) and this option
+               control the threshold. Default is 0.5 (%). The overhead is calculated
+               by value depends on call-graph.print-type.
+
+       call-graph.print-limit::
+               This is a maximum number of lines of callchain printed for a single
+               histogram entry. Default is 0 which means no limitation.
+
+report.*::
+       report.percent-limit::
+               This one is mostly the same as call-graph.threshold but works for
+               histogram entries. Entries having an overhead lower than this
+               percentage will not be printed. Default is '0'. If percent-limit
+               is '10', only entries which have more than 10% of overhead will be
+               printed.
+
+       report.queue-size::
+               This option sets up the maximum allocation size of the internal
+               event queue for ordering events. Default is 0, meaning no limit.
+
+       report.children::
+               'Children' means functions called from another function.
+               If this option is true, 'perf report' cumulates callchains of children
+               and show (accumulated) total overhead as well as 'Self' overhead.
+               Please refer to the 'perf report' manual. The default is 'true'.
+
+       report.group::
+               This option is to show event group information together.
+               Example output with this turned on, notice that there is one column
+               per event in the group, ref-cycles and cycles:
+
+               # group: {ref-cycles,cycles}
+               # ========
+               #
+               # Samples: 7K of event 'anon group { ref-cycles, cycles }'
+               # Event count (approx.): 6876107743
+               #
+               #         Overhead  Command      Shared Object               Symbol
+               # ................  .......  .................  ...................
+               #
+                   99.84%  99.76%  noploop  noploop            [.] main
+                    0.07%   0.00%  noploop  ld-2.15.so         [.] strcmp
+                    0.03%   0.00%  noploop  [kernel.kallsyms]  [k] timerqueue_del
+
+top.*::
+       top.children::
+               Same as 'report.children'. So if it is enabled, the output of 'top'
+               command will have 'Children' overhead column as well as 'Self' overhead
+               column by default.
+               The default is 'true'.
+
+man.*::
+       man.viewer::
+               This option can assign a tool to view manual pages when 'help'
+               subcommand was invoked. Supported tools are 'man', 'woman'
+               (with emacs client) and 'konqueror'. Default is 'man'.
+
+               New man viewer tool can be also added using 'man.<tool>.cmd'
+               or use different path using 'man.<tool>.path' config option.
+
+pager.*::
+       pager.<subcommand>::
+               When the subcommand is run on stdio, determine whether it uses
+               pager or not based on this value. Default is 'unspecified'.
+
+kmem.*::
+       kmem.default::
+               This option decides which allocator is to be analyzed if neither
+               '--slab' nor '--page' option is used. Default is 'slab'.
+
+record.*::
+       record.build-id::
+               This option can be 'cache', 'no-cache' or 'skip'.
+               'cache' is to post-process data and save/update the binaries into
+               the build-id cache (in ~/.debug). This is the default.
+               But if this option is 'no-cache', it will not update the build-id cache.
+               'skip' skips post-processing and does not update the cache.
+
 SEE ALSO
 --------
 linkperf:perf[1]
index 0b1cedeef895369c9c60aa0a1e6cbb718c65c509..87b2588d1cbdee6060a304be4f79015cb5889171 100644 (file)
@@ -53,6 +53,13 @@ include::itrace.txt[]
 --strip::
        Use with --itrace to strip out non-synthesized events.
 
+-j::
+--jit::
+       Process jitdump files by injecting the mmap records corresponding to jitted
+       functions. This option also generates the ELF images for each jitted function
+       found in the jitdumps files captured in the input perf.data file. Use this option
+       if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
index fbceb631387c31c44002080130b7ff6e282229d3..19aa17532a16709646dc52487c5f686675c2b658 100644 (file)
@@ -341,6 +341,12 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--all-kernel::
+Configure all used events to run in kernel space.
+
+--all-user::
+Configure all used events to run in user space.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
index 8a301f6afb37ad855351fcd5fba1644d12dc2529..12113992ac9d0f5ceca0b003cec568717c4208e5 100644 (file)
@@ -117,6 +117,22 @@ OPTIONS
        And default sort keys are changed to comm, dso_from, symbol_from, dso_to
        and symbol_to, see '--branch-stack'.
 
+       If the --mem-mode option is used, the following sort keys are also available
+       (incompatible with --branch-stack):
+       symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+       - symbol_daddr: name of data symbol being executed on at the time of sample
+       - dso_daddr: name of library or module containing the data being executed
+       on at the time of the sample
+       - locked: whether the bus was locked at the time of the sample
+       - tlb: type of tlb access for the data at the time of the sample
+       - mem: type of memory access for the data at the time of the sample
+       - snoop: type of snoop (if any) for the data at the time of the sample
+       - dcacheline: the cacheline the data address is on at the time of the sample
+
+       And the default sort keys are changed to local_weight, mem, sym, dso,
+       symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
        If the data file has tracepoint event(s), following (dynamic) sort keys
        are also available:
        trace, trace_fields, [<event>.]<field>[/raw]
@@ -151,22 +167,6 @@ OPTIONS
        By default, every sort keys not specified in -F will be appended
        automatically.
 
-       If --mem-mode option is used, following sort keys are also available
-       (incompatible with --branch-stack):
-       symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
-
-       - symbol_daddr: name of data symbol being executed on at the time of sample
-       - dso_daddr: name of library or module containing the data being executed
-       on at the time of sample
-       - locked: whether the bus was locked at the time of sample
-       - tlb: type of tlb access for the data at the time of sample
-       - mem: type of memory access for the data at the time of sample
-       - snoop: type of snoop (if any) for the data at the time of sample
-       - dcacheline: the cacheline the data address is on at the time of sample
-
-       And default sort keys are changed to local_weight, mem, sym, dso,
-       symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
-
 -p::
 --parent=<regex>::
         A regex filter to identify parent. The parent is a caller of this
@@ -351,7 +351,10 @@ OPTIONS
 
 --percent-limit::
        Do not show entries which have an overhead under that percent.
-       (Default: 0).
+       (Default: 0).  Note that this option also sets the percent limit (threshold)
+       of callchains.  However the default value of callchain threshold is
+       different than the default value of hist entries.  Please see the
+       --call-graph option for details.
 
 --percentage::
        Determine how to display the overhead percentage of filtered entries.
@@ -398,6 +401,9 @@ include::itrace.txt[]
 --raw-trace::
        When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+       Enable hierarchical output.
+
 include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
index 52ef7a9d50aacbc3d3fbb0366852457d39718cdf..04f23b404bbc5a7bd4b5bcdfb7e833d79ba1234e 100644 (file)
@@ -69,6 +69,14 @@ report::
 --scale::
        scale/normalize counter values
 
+-d::
+--detailed::
+       print more detailed statistics, can be specified up to 3 times
+
+          -d:          detailed events, L1 and LLC data cache
+        -d -d:     more detailed events, dTLB and iTLB events
+     -d -d -d:     very detailed events, adding prefetch events
+
 -r::
 --repeat=<n>::
        repeat command and print average + stddev (max: 100). 0 means forever.
@@ -139,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms)
 The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals.  Use with caution.
        example: 'perf stat -I 1000 -e cycles -a sleep 5'
 
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with --per-thread.
+
 --per-socket::
 Aggregate counts per processor socket for system-wide mode measurements.  This
 is a useful mode to detect imbalance between sockets.  To enable this mode,
@@ -211,6 +223,29 @@ $ perf stat -- make -j
 
  Wall-clock time elapsed:   719.554352 msecs
 
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+       - optional usec time stamp in fractions of second (with -I xxx)
+       - optional CPU, core, or socket identifier
+       - optional number of logical CPUs aggregated
+       - counter value
+       - unit of the counter value or empty
+       - event name
+       - run time of counter
+       - percentage of measurement time the counter was running
+       - optional variance if multiple values are collected with -r
+       - optional metric value
+       - optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-list[1]
index b0e60e17db389d89efafa7025cd47fd44fe625c3..19f046f027cd81e42c5696ab3172539baaeb745d 100644 (file)
@@ -233,6 +233,9 @@ Default is to monitor all CPUS.
 --raw-trace::
        When displaying traceevent output, do not use print fmt or plugins.
 
+--hierarchy::
+       Enable hierarchy output.
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 
index 767ea2436e1cd841a762ee8cdb039ba80a7120b3..1d8d5bc4cd2de6601f926696d5c172b30fa735d4 100644 (file)
@@ -5,7 +5,7 @@
        medium = green, lightgray
        normal = black, lightgray
        selected = lightgray, magenta
-       code = blue, lightgray
+       jump_arrows = blue, lightgray
        addr = magenta, lightgray
 
 [tui]
index e0ce9573b79bd26ddfe8cb4aee6eade061a09a12..5950b5a24efdf6024ca58a2da084d9cd43295c12 100644 (file)
@@ -27,3 +27,4 @@ Skip collecing build-id when recording: perf record -B
 To change sampling frequency to 100 Hz: perf record -F 100
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
+For hierarchical output, try: perf report --hierarchy
index dcd9a70c7193b43b5791058b01a3773da6e0d870..32a64e6190288e5110cfa6de78138aceb955e7af 100644 (file)
@@ -68,6 +68,20 @@ all tags TAGS:
        $(print_msg)
        $(make)
 
+ifdef MAKECMDGOALS
+has_clean := 0
+ifneq ($(filter clean,$(MAKECMDGOALS)),)
+  has_clean := 1
+endif # clean
+
+ifeq ($(has_clean),1)
+  rest := $(filter-out clean,$(MAKECMDGOALS))
+  ifneq ($(rest),)
+$(rest): clean
+  endif # rest
+endif # has_clean
+endif # MAKECMDGOALS
+
 #
 # The clean target is not really parallel, don't print the jobs info:
 #
@@ -75,10 +89,17 @@ clean:
        $(make)
 
 #
-# The build-test target is not really parallel, don't print the jobs info:
+# The build-test target is not really parallel, don't print the jobs info,
+# it also uses only the tests/make targets that don't pollute the source
+# repository, i.e. that uses O= or builds the tarpkg outside the source
+# repo directories.
+#
+# For a full test, use:
+#
+# make -C tools/perf -f tests/make
 #
 build-test:
-       @$(MAKE) SHUF=1 -f tests/make --no-print-directory
+       @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out
 
 #
 # All other targets get passed through:
index 5d34815c7ccb8e02ac6d4db0d82d4712af73f343..4a4fad4182f534f23550ca5ac1ed5d42000f9b08 100644 (file)
@@ -58,6 +58,9 @@ include config/utilities.mak
 #
 # Define NO_LIBBIONIC if you do not want bionic support
 #
+# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
+# used for generating build-ids for ELFs generated by jitdump.
+#
 # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
 # for dwarf backtrace post unwind.
 #
@@ -136,6 +139,8 @@ $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
+LD += $(EXTRA_LDFLAGS)
+
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 
 RM      = rm -f
@@ -165,7 +170,16 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
 endif
 endif
 
+# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
+# Without this setting the output feature dump file misses some features, for
+# example, liberty. Select all checkers so we won't get an incomplete feature
+# dump file.
 ifeq ($(config),1)
+ifdef MAKECMDGOALS
+ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
+FEATURE_TESTS := all
+endif
+endif
 include config/Makefile
 endif
 
@@ -618,7 +632,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
        $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
        $(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
                $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
-               $(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c
+               $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c
        $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
        $(python-clean)
 
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..18b13518d8d8701137d489a65f7dd752bdc7f43f 100644 (file)
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..18b13518d8d8701137d489a65f7dd752bdc7f43f 100644 (file)
@@ -1,3 +1,4 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+PERF_HAVE_JITDUMP := 1
index 7fbca175099ec917ad69b8025c8249ee6c52a6a4..56e05f126ad8793d25bb1a70ce3dac5f100d3b3a 100644 (file)
@@ -1,3 +1,6 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_JITDUMP := 1
index 7b8b0d1a1b626065e0b414f42a7a998723e0e57f..c8fe2074d2177f0709f0e1d54f63cd77f84f2233 100644 (file)
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644 (file)
index 0000000..0dd6b7f
--- /dev/null
@@ -0,0 +1,123 @@
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+       {0x4, "H_REMOVE"},                                      \
+       {0x8, "H_ENTER"},                                       \
+       {0xc, "H_READ"},                                        \
+       {0x10, "H_CLEAR_MOD"},                                  \
+       {0x14, "H_CLEAR_REF"},                                  \
+       {0x18, "H_PROTECT"},                                    \
+       {0x1c, "H_GET_TCE"},                                    \
+       {0x20, "H_PUT_TCE"},                                    \
+       {0x24, "H_SET_SPRG0"},                                  \
+       {0x28, "H_SET_DABR"},                                   \
+       {0x2c, "H_PAGE_INIT"},                                  \
+       {0x30, "H_SET_ASR"},                                    \
+       {0x34, "H_ASR_ON"},                                     \
+       {0x38, "H_ASR_OFF"},                                    \
+       {0x3c, "H_LOGICAL_CI_LOAD"},                            \
+       {0x40, "H_LOGICAL_CI_STORE"},                           \
+       {0x44, "H_LOGICAL_CACHE_LOAD"},                         \
+       {0x48, "H_LOGICAL_CACHE_STORE"},                        \
+       {0x4c, "H_LOGICAL_ICBI"},                               \
+       {0x50, "H_LOGICAL_DCBF"},                               \
+       {0x54, "H_GET_TERM_CHAR"},                              \
+       {0x58, "H_PUT_TERM_CHAR"},                              \
+       {0x5c, "H_REAL_TO_LOGICAL"},                            \
+       {0x60, "H_HYPERVISOR_DATA"},                            \
+       {0x64, "H_EOI"},                                        \
+       {0x68, "H_CPPR"},                                       \
+       {0x6c, "H_IPI"},                                        \
+       {0x70, "H_IPOLL"},                                      \
+       {0x74, "H_XIRR"},                                       \
+       {0x78, "H_MIGRATE_DMA"},                                \
+       {0x7c, "H_PERFMON"},                                    \
+       {0xdc, "H_REGISTER_VPA"},                               \
+       {0xe0, "H_CEDE"},                                       \
+       {0xe4, "H_CONFER"},                                     \
+       {0xe8, "H_PROD"},                                       \
+       {0xec, "H_GET_PPP"},                                    \
+       {0xf0, "H_SET_PPP"},                                    \
+       {0xf4, "H_PURR"},                                       \
+       {0xf8, "H_PIC"},                                        \
+       {0xfc, "H_REG_CRQ"},                                    \
+       {0x100, "H_FREE_CRQ"},                                  \
+       {0x104, "H_VIO_SIGNAL"},                                \
+       {0x108, "H_SEND_CRQ"},                                  \
+       {0x110, "H_COPY_RDMA"},                                 \
+       {0x114, "H_REGISTER_LOGICAL_LAN"},                      \
+       {0x118, "H_FREE_LOGICAL_LAN"},                          \
+       {0x11c, "H_ADD_LOGICAL_LAN_BUFFER"},                    \
+       {0x120, "H_SEND_LOGICAL_LAN"},                          \
+       {0x124, "H_BULK_REMOVE"},                               \
+       {0x130, "H_MULTICAST_CTRL"},                            \
+       {0x134, "H_SET_XDABR"},                                 \
+       {0x138, "H_STUFF_TCE"},                                 \
+       {0x13c, "H_PUT_TCE_INDIRECT"},                          \
+       {0x14c, "H_CHANGE_LOGICAL_LAN_MAC"},                    \
+       {0x150, "H_VTERM_PARTNER_INFO"},                        \
+       {0x154, "H_REGISTER_VTERM"},                            \
+       {0x158, "H_FREE_VTERM"},                                \
+       {0x15c, "H_RESET_EVENTS"},                              \
+       {0x160, "H_ALLOC_RESOURCE"},                            \
+       {0x164, "H_FREE_RESOURCE"},                             \
+       {0x168, "H_MODIFY_QP"},                                 \
+       {0x16c, "H_QUERY_QP"},                                  \
+       {0x170, "H_REREGISTER_PMR"},                            \
+       {0x174, "H_REGISTER_SMR"},                              \
+       {0x178, "H_QUERY_MR"},                                  \
+       {0x17c, "H_QUERY_MW"},                                  \
+       {0x180, "H_QUERY_HCA"},                                 \
+       {0x184, "H_QUERY_PORT"},                                \
+       {0x188, "H_MODIFY_PORT"},                               \
+       {0x18c, "H_DEFINE_AQP1"},                               \
+       {0x190, "H_GET_TRACE_BUFFER"},                          \
+       {0x194, "H_DEFINE_AQP0"},                               \
+       {0x198, "H_RESIZE_MR"},                                 \
+       {0x19c, "H_ATTACH_MCQP"},                               \
+       {0x1a0, "H_DETACH_MCQP"},                               \
+       {0x1a4, "H_CREATE_RPT"},                                \
+       {0x1a8, "H_REMOVE_RPT"},                                \
+       {0x1ac, "H_REGISTER_RPAGES"},                           \
+       {0x1b0, "H_DISABLE_AND_GETC"},                          \
+       {0x1b4, "H_ERROR_DATA"},                                \
+       {0x1b8, "H_GET_HCA_INFO"},                              \
+       {0x1bc, "H_GET_PERF_COUNT"},                            \
+       {0x1c0, "H_MANAGE_TRACE"},                              \
+       {0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"},                   \
+       {0x1d8, "H_POLL_PENDING"},                              \
+       {0x1e4, "H_QUERY_INT_STATE"},                           \
+       {0x244, "H_ILLAN_ATTRIBUTES"},                          \
+       {0x250, "H_MODIFY_HEA_QP"},                             \
+       {0x254, "H_QUERY_HEA_QP"},                              \
+       {0x258, "H_QUERY_HEA"},                                 \
+       {0x25c, "H_QUERY_HEA_PORT"},                            \
+       {0x260, "H_MODIFY_HEA_PORT"},                           \
+       {0x264, "H_REG_BCMC"},                                  \
+       {0x268, "H_DEREG_BCMC"},                                \
+       {0x26c, "H_REGISTER_HEA_RPAGES"},                       \
+       {0x270, "H_DISABLE_AND_GET_HEA"},                       \
+       {0x274, "H_GET_HEA_INFO"},                              \
+       {0x278, "H_ALLOC_HEA_RESOURCE"},                        \
+       {0x284, "H_ADD_CONN"},                                  \
+       {0x288, "H_DEL_CONN"},                                  \
+       {0x298, "H_JOIN"},                                      \
+       {0x2a4, "H_VASI_STATE"},                                \
+       {0x2b0, "H_ENABLE_CRQ"},                                \
+       {0x2b8, "H_GET_EM_PARMS"},                              \
+       {0x2d0, "H_SET_MPP"},                                   \
+       {0x2d4, "H_GET_MPP"},                                   \
+       {0x2ec, "H_HOME_NODE_ASSOCIATIVITY"},                   \
+       {0x2f4, "H_BEST_ENERGY"},                               \
+       {0x2fc, "H_XIRR_X"},                                    \
+       {0x300, "H_RANDOM"},                                    \
+       {0x304, "H_COP"},                                       \
+       {0x314, "H_GET_MPP_X"},                                 \
+       {0x31c, "H_SET_MODE"},                                  \
+       {0xf000, "H_RTAS"}                                      \
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644 (file)
index 0000000..e68ba2d
--- /dev/null
@@ -0,0 +1,33 @@
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+       {0x0,   "RETURN_TO_HOST"}, \
+       {0x100, "SYSTEM_RESET"}, \
+       {0x200, "MACHINE_CHECK"}, \
+       {0x300, "DATA_STORAGE"}, \
+       {0x380, "DATA_SEGMENT"}, \
+       {0x400, "INST_STORAGE"}, \
+       {0x480, "INST_SEGMENT"}, \
+       {0x500, "EXTERNAL"}, \
+       {0x501, "EXTERNAL_LEVEL"}, \
+       {0x502, "EXTERNAL_HV"}, \
+       {0x600, "ALIGNMENT"}, \
+       {0x700, "PROGRAM"}, \
+       {0x800, "FP_UNAVAIL"}, \
+       {0x900, "DECREMENTER"}, \
+       {0x980, "HV_DECREMENTER"}, \
+       {0xc00, "SYSCALL"}, \
+       {0xd00, "TRACE"}, \
+       {0xe00, "H_DATA_STORAGE"}, \
+       {0xe20, "H_INST_STORAGE"}, \
+       {0xe40, "H_EMUL_ASSIST"}, \
+       {0xf00, "PERFMON"}, \
+       {0xf20, "ALTIVEC"}, \
+       {0xf40, "VSX"}
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644 (file)
index 0000000..74eee30
--- /dev/null
@@ -0,0 +1,170 @@
+#include "util/kvm-stat.h"
+#include "util/parse-events.h"
+#include "util/debug.h"
+
+#include "book3s_hv_exits.h"
+#include "book3s_hcalls.h"
+
+#define NR_TPS 4
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 40;
+const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter";
+const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit";
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
+
+/* Tracepoints specific to ppc_book3s_hv */
+const char *ppc_book3s_hv_kvm_tp[] = {
+       "kvm_hv:kvm_guest_enter",
+       "kvm_hv:kvm_guest_exit",
+       "kvm_hv:kvm_hcall_enter",
+       "kvm_hv:kvm_hcall_exit",
+       NULL,
+};
+
+/* 1 extra placeholder for NULL */
+const char *kvm_events_tp[NR_TPS + 1];
+const char *kvm_exit_reason;
+
+static void hcall_event_get_key(struct perf_evsel *evsel,
+                               struct perf_sample *sample,
+                               struct event_key *key)
+{
+       key->info = 0;
+       key->key = perf_evsel__intval(evsel, sample, "req");
+}
+
+static const char *get_hcall_exit_reason(u64 exit_code)
+{
+       struct exit_reasons_table *tbl = hcall_reasons;
+
+       while (tbl->reason != NULL) {
+               if (tbl->exit_code == exit_code)
+                       return tbl->reason;
+               tbl++;
+       }
+
+       pr_debug("Unknown hcall code: %lld\n",
+              (unsigned long long)exit_code);
+       return "UNKNOWN";
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+                           struct perf_sample *sample __maybe_unused,
+                           struct event_key *key __maybe_unused)
+{
+       return (!strcmp(evsel->name, kvm_events_tp[3]));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+                             struct perf_sample *sample, struct event_key *key)
+{
+       if (!strcmp(evsel->name, kvm_events_tp[2])) {
+               hcall_event_get_key(evsel, sample, key);
+               return true;
+       }
+
+       return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+                                  struct event_key *key,
+                                  char *decode)
+{
+       const char *hcall_reason = get_hcall_exit_reason(key->key);
+
+       scnprintf(decode, decode_str_len, "%s", hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+       .is_begin_event = hcall_event_begin,
+       .is_end_event = hcall_event_end,
+       .decode_key = hcall_event_decode_key,
+       .name = "HCALL-EVENT",
+};
+
+static struct kvm_events_ops exit_events = {
+       .is_begin_event = exit_event_begin,
+       .is_end_event = exit_event_end,
+       .decode_key = exit_event_decode_key,
+       .name = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+       { .name = "vmexit", .ops = &exit_events },
+       { .name = "hcall", .ops = &hcall_events },
+       { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+       NULL,
+};
+
+
+static int is_tracepoint_available(const char *str, struct perf_evlist *evlist)
+{
+       struct parse_events_error err;
+       int ret;
+
+       err.str = NULL;
+       ret = parse_events(evlist, str, &err);
+       if (err.str)
+               pr_err("%s : %s\n", str, err.str);
+       return ret;
+}
+
+static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm,
+                               struct perf_evlist *evlist)
+{
+       const char **events_ptr;
+       int i, nr_tp = 0, err = -1;
+
+       /* Check for book3s_hv tracepoints */
+       for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) {
+               err = is_tracepoint_available(*events_ptr, evlist);
+               if (err)
+                       return -1;
+               nr_tp++;
+       }
+
+       for (i = 0; i < nr_tp; i++)
+               kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i];
+
+       kvm_events_tp[i] = NULL;
+       kvm_exit_reason = "trap";
+       kvm->exit_reasons = hv_exit_reasons;
+       kvm->exit_reasons_isa = "HV";
+
+       return 0;
+}
+
+/* Wrapper to setup kvm tracepoints */
+static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm)
+{
+       struct perf_evlist *evlist = perf_evlist__new();
+
+       if (evlist == NULL)
+               return -ENOMEM;
+
+       /* Right now, only supported on book3s_hv */
+       return ppc__setup_book3s_hv(kvm, evlist);
+}
+
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm)
+{
+       return ppc__setup_kvm_tp(kvm);
+}
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+       int ret;
+
+       ret = ppc__setup_kvm_tp(kvm);
+       if (ret) {
+               kvm->exit_reasons = NULL;
+               kvm->exit_reasons_isa = NULL;
+       }
+
+       return ret;
+}
index a5dbc07ec9dc70900ed4a593f5979d419061fd35..ed57df2e6d687d89ccf95d9aa03c263411de7fe9 100644 (file)
@@ -10,7 +10,7 @@
  */
 
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/sie.h>
 
 define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
 define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
@@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes);
 define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
 define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
 
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
 static void event_icpt_insn_get_key(struct perf_evsel *evsel,
                                    struct perf_sample *sample,
                                    struct event_key *key)
@@ -73,7 +79,7 @@ static struct kvm_events_ops exit_events = {
        .name = "VM-EXIT"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
        "kvm:kvm_s390_sie_enter",
        "kvm:kvm_s390_sie_exit",
        "kvm:kvm_s390_intercept_instruction",
index 09ba923debe86810f8380f7df54504dee4232ec8..269af21437353b2fb886383ede1e3f9a2f586f25 100644 (file)
@@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
index 7bb0d13c235f70326afc28d726f38f1d95499055..72193f19d6d75d3c32e19b424a0f1910ebbc24a6 100644 (file)
@@ -59,7 +59,7 @@ static u64 mmap_read_self(void *addr)
                u64 quot, rem;
 
                quot = (cyc >> time_shift);
-               rem = cyc & ((1 << time_shift) - 1);
+               rem = cyc & (((u64)1 << time_shift) - 1);
                delta = time_offset + quot * time_mult +
                        ((rem * time_mult) >> time_shift);
 
@@ -103,6 +103,7 @@ static int __test__rdpmc(void)
 
        sigfillset(&sa.sa_mask);
        sa.sa_sigaction = segfault_handler;
+       sa.sa_flags = 0;
        sigaction(SIGSEGV, &sa, NULL);
 
        fd = sys_perf_event_open(&attr, 0, -1, -1,
index 8d8150f1cf9bcf426b4c29a19170221db680a980..d66f9ad4df2ea5da6eca22cae351a842d1143508 100644 (file)
@@ -60,7 +60,9 @@ struct branch {
        u64 misc;
 };
 
-static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+                        struct perf_evlist *evlist __maybe_unused)
 {
        return INTEL_BTS_AUXTRACE_PRIV_SIZE;
 }
index f05daacc9e7810117cfe25415a75990b3aa89f63..a3395179c9eebd5fcd39aa4796a0dc3d495f4b5d 100644 (file)
@@ -89,7 +89,7 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats,
 
        *config = attr.config;
 out_free:
-       parse_events__free_terms(terms);
+       parse_events_terms__delete(terms);
        return err;
 }
 
@@ -273,7 +273,9 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
        return attr;
 }
 
-static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused)
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+                       struct perf_evlist *evlist __maybe_unused)
 {
        return INTEL_PT_AUXTRACE_PRIV_SIZE;
 }
index 14e4e668fad733c1802908047ae524318aa586de..b63d4be655a24a678f69d6deea3b4ee87283b938 100644 (file)
@@ -1,5 +1,7 @@
 #include "../../util/kvm-stat.h"
-#include <asm/kvm_perf.h>
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
 
 define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
 define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = {
        .name = "VM-EXIT"
 };
 
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
 /*
  * For the mmio events, we treat:
  * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
@@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
                                  struct event_key *key,
                                  char *decode)
 {
-       scnprintf(decode, DECODE_STR_LEN, "%#lx:%s",
+       scnprintf(decode, decode_str_len, "%#lx:%s",
                  (unsigned long)key->key,
                  key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
 }
@@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
                                    struct event_key *key,
                                    char *decode)
 {
-       scnprintf(decode, DECODE_STR_LEN, "%#llx:%s",
+       scnprintf(decode, decode_str_len, "%#llx:%s",
                  (unsigned long long)key->key,
                  key->info ? "POUT" : "PIN");
 }
@@ -121,7 +129,7 @@ static struct kvm_events_ops ioport_events = {
        .name = "IO Port Access"
 };
 
-const char * const kvm_events_tp[] = {
+const char *kvm_events_tp[] = {
        "kvm:kvm_entry",
        "kvm:kvm_exit",
        "kvm:kvm_mmio",
index e4c2c30143b95133913a2bc3569b2871b79aa75c..5c3cce082cb88c5cab984bdb1ae6944274861ad4 100644 (file)
@@ -1,6 +1,11 @@
+
+/* Various wrappers to make the kernel .S file build in user-space: */
+
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
+#define _ASM_EXTABLE_FAULT(x, y)
+
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
index cc5c1267c738da038cfb6b824f79854fab41a629..cfe366375c4b89bb3642f5531d04b418a8d76e56 100644 (file)
@@ -245,7 +245,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
                        hists__collapse_resort(hists, NULL);
                        /* Don't sort callchain */
                        perf_evsel__reset_sample_bit(pos, CALLCHAIN);
-                       hists__output_resort(hists, NULL);
+                       perf_evsel__output_resort(pos, NULL);
 
                        if (symbol_conf.event_group &&
                            !perf_evsel__is_group_leader(pos))
index d93bff7fc0e407d6518a6ce0e2ee0dd4b65bf910..632efc6b79a07e20c4b25f9757bcd9c089050270 100644 (file)
@@ -38,19 +38,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
 
 static int build_id_cache__kcore_dir(char *dir, size_t sz)
 {
-       struct timeval tv;
-       struct tm tm;
-       char dt[32];
-
-       if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
-               return -1;
-
-       if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
-               return -1;
-
-       scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
-
-       return 0;
+       return fetch_current_timestamp(dir, sz);
 }
 
 static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
index f04e804a9fadc6807a7bc74810fac5ddaad912b2..c42448ed5dfe20a74af9c671669aa23a25cadb5b 100644 (file)
 #include "util/util.h"
 #include "util/debug.h"
 
+static bool use_system_config, use_user_config;
+
 static const char * const config_usage[] = {
-       "perf config [options]",
+       "perf config [<file-option>] [options]",
        NULL
 };
 
@@ -25,6 +27,8 @@ enum actions {
 static struct option config_options[] = {
        OPT_SET_UINT('l', "list", &actions,
                     "show current config variables", ACTION_LIST),
+       OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"),
+       OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"),
        OPT_END()
 };
 
@@ -42,10 +46,23 @@ static int show_config(const char *key, const char *value,
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
        int ret = 0;
+       char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
        argc = parse_options(argc, argv, config_options, config_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
 
+       if (use_system_config && use_user_config) {
+               pr_err("Error: only one config file at a time\n");
+               parse_options_usage(config_usage, config_options, "user", 0);
+               parse_options_usage(NULL, config_options, "system", 0);
+               return -1;
+       }
+
+       if (use_system_config)
+               config_exclusive_filename = perf_etc_perfconfig();
+       else if (use_user_config)
+               config_exclusive_filename = user_config;
+
        switch (actions) {
        case ACTION_LIST:
                if (argc) {
@@ -53,9 +70,13 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
                        parse_options_usage(config_usage, config_options, "l", 1);
                } else {
                        ret = perf_config(show_config, NULL);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               const char * config_filename = config_exclusive_filename;
+                               if (!config_exclusive_filename)
+                                       config_filename = user_config;
                                pr_err("Nothing configured, "
-                                      "please check your ~/.perfconfig file\n");
+                                      "please check your %s \n", config_filename);
+                       }
                }
                break;
        default:
index 36ccc2b8827fdd239b657e79b396ae013c318462..4d72359fd15ad00ca7397bb033e84b4f899546c2 100644 (file)
@@ -1264,8 +1264,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
        if (ret < 0)
                return ret;
 
-       perf_config(perf_default_config, NULL);
-
        argc = parse_options(argc, argv, options, diff_usage, 0);
 
        if (symbol__init(NULL) < 0)
index 96c1a4cfbbbf6b1f639d91d562c55b09da5c7e6f..49d55e21b1b06dbecd9c6935a2e0f33550585514 100644 (file)
@@ -86,8 +86,7 @@ static int check_emacsclient_version(void)
                return -1;
        }
 
-       strbuf_remove(&buffer, 0, strlen("emacsclient"));
-       version = atoi(buffer.buf);
+       version = atoi(buffer.buf + strlen("emacsclient"));
 
        if (version < 22) {
                fprintf(stderr,
@@ -273,7 +272,7 @@ static int perf_help_config(const char *var, const char *value, void *cb)
        if (!prefixcmp(var, "man."))
                return add_man_viewer_info(var, value);
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static struct cmdnames main_cmds, other_cmds;
index 0022e02ed31a7034b9286884ab87c4c131e41fa3..7fa68663ed7287f63adad2342db987941ba0c1b4 100644 (file)
@@ -17,6 +17,7 @@
 #include "util/build-id.h"
 #include "util/data.h"
 #include "util/auxtrace.h"
+#include "util/jit.h"
 
 #include <subcmd/parse-options.h>
 
@@ -29,6 +30,7 @@ struct perf_inject {
        bool                    sched_stat;
        bool                    have_auxtrace;
        bool                    strip;
+       bool                    jit_mode;
        const char              *input_name;
        struct perf_data_file   output;
        u64                     bytes_written;
@@ -71,6 +73,15 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool,
        return perf_event__repipe_synth(tool, event);
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
+                              union perf_event *event __maybe_unused,
+                              struct ordered_events *oe __maybe_unused)
+{
+       return 0;
+}
+#endif
+
 static int perf_event__repipe_op2_synth(struct perf_tool *tool,
                                        union perf_event *event,
                                        struct perf_session *session
@@ -234,6 +245,31 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
        return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
+                                      union perf_event *event,
+                                      struct perf_sample *sample,
+                                      struct machine *machine)
+{
+       struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+       u64 n = 0;
+       int ret;
+
+       /*
+        * if jit marker, then inject jit mmaps and generate ELF images
+        */
+       ret = jit_process(inject->session, &inject->output, machine,
+                         event->mmap.filename, sample->pid, &n);
+       if (ret < 0)
+               return ret;
+       if (ret) {
+               inject->bytes_written += n;
+               return 0;
+       }
+       return perf_event__repipe_mmap(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_mmap2(struct perf_tool *tool,
                                   union perf_event *event,
                                   struct perf_sample *sample,
@@ -247,6 +283,31 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool,
        return err;
 }
 
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
+                                       union perf_event *event,
+                                       struct perf_sample *sample,
+                                       struct machine *machine)
+{
+       struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+       u64 n = 0;
+       int ret;
+
+       /*
+        * if jit marker, then inject jit mmaps and generate ELF images
+        */
+       ret = jit_process(inject->session, &inject->output, machine,
+                         event->mmap2.filename, sample->pid, &n);
+       if (ret < 0)
+               return ret;
+       if (ret) {
+               inject->bytes_written += n;
+               return 0;
+       }
+       return perf_event__repipe_mmap2(tool, event, sample, machine);
+}
+#endif
+
 static int perf_event__repipe_fork(struct perf_tool *tool,
                                   union perf_event *event,
                                   struct perf_sample *sample,
@@ -626,12 +687,16 @@ static int __cmd_inject(struct perf_inject *inject)
        ret = perf_session__process_events(session);
 
        if (!file_out->is_pipe) {
-               if (inject->build_ids) {
+               if (inject->build_ids)
                        perf_header__set_feat(&session->header,
                                              HEADER_BUILD_ID);
-                       if (inject->have_auxtrace)
-                               dsos__hit_all(session);
-               }
+               /*
+                * Keep all buildids when there is unprocessed AUX data because
+                * it is not known which ones the AUX trace hits.
+                */
+               if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
+                   inject->have_auxtrace && !inject->itrace_synth_opts.set)
+                       dsos__hit_all(session);
                /*
                 * The AUX areas have been removed and replaced with
                 * synthesized hardware events, so clear the feature flag and
@@ -703,7 +768,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
        };
        int ret;
 
-       const struct option options[] = {
+       struct option options[] = {
                OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
                            "Inject build-ids into the output stream"),
                OPT_STRING('i', "input", &inject.input_name, "file",
@@ -713,6 +778,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
                            "Merge sched-stat and sched-switch for getting events "
                            "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
+               OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
                OPT_INCR('v', "verbose", &verbose,
                         "be more verbose (show build ids, etc)"),
                OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
@@ -729,7 +797,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                "perf inject [<options>]",
                NULL
        };
-
+#ifndef HAVE_JITDUMP
+       set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
+#endif
        argc = parse_options(argc, argv, options, inject_usage, 0);
 
        /*
@@ -755,6 +825,29 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
        if (inject.session == NULL)
                return -1;
 
+       if (inject.build_ids) {
+               /*
+                * to make sure the mmap records are ordered correctly
+                * and so that the correct especially due to jitted code
+                * mmaps. We cannot generate the buildid hit list and
+                * inject the jit mmaps at the same time for now.
+                */
+               inject.tool.ordered_events = true;
+               inject.tool.ordering_requires_timestamps = true;
+       }
+#ifdef HAVE_JITDUMP
+       if (inject.jit_mode) {
+               inject.tool.mmap2          = perf_event__jit_repipe_mmap2;
+               inject.tool.mmap           = perf_event__jit_repipe_mmap;
+               inject.tool.ordered_events = true;
+               inject.tool.ordering_requires_timestamps = true;
+               /*
+                * JIT MMAP injection injects all MMAP events in one go, so it
+                * does not obey finished_round semantics.
+                */
+               inject.tool.finished_round = perf_event__drop_oe;
+       }
+#endif
        ret = symbol__init(&inject.session->header.env);
        if (ret < 0)
                goto out_delete;
index 118010553d0cf0dca9de6f493a5c8e267b0fff48..4d3340cce9a0200b469fd61a563eee4cb7f48830 100644 (file)
@@ -1834,7 +1834,7 @@ static int __cmd_record(int argc, const char **argv)
        return cmd_record(i, rec_argv, NULL);
 }
 
-static int kmem_config(const char *var, const char *value, void *cb)
+static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
 {
        if (!strcmp(var, "kmem.default")) {
                if (!strcmp(value, "slab"))
@@ -1847,7 +1847,7 @@ static int kmem_config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
index 4418d9214872150648a719dd07fc14a29c913e9b..bff666458b28e24dccac682d0f28b6708a1a7c83 100644 (file)
@@ -30,7 +30,6 @@
 #include <math.h>
 
 #ifdef HAVE_KVM_STAT_SUPPORT
-#include <asm/kvm_perf.h>
 #include "util/kvm-stat.h"
 
 void exit_event_get_key(struct perf_evsel *evsel,
@@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel,
                        struct event_key *key)
 {
        key->info = 0;
-       key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON);
+       key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
 }
 
 bool kvm_exit_event(struct perf_evsel *evsel)
 {
-       return !strcmp(evsel->name, KVM_EXIT_TRACE);
+       return !strcmp(evsel->name, kvm_exit_trace);
 }
 
 bool exit_event_begin(struct perf_evsel *evsel,
@@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel,
 
 bool kvm_entry_event(struct perf_evsel *evsel)
 {
-       return !strcmp(evsel->name, KVM_ENTRY_TRACE);
+       return !strcmp(evsel->name, kvm_entry_trace);
 }
 
 bool exit_event_end(struct perf_evsel *evsel,
@@ -91,7 +90,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm,
        const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
                                                  key->key);
 
-       scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason);
+       scnprintf(decode, decode_str_len, "%s", exit_reason);
 }
 
 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
@@ -357,7 +356,7 @@ static bool handle_end_event(struct perf_kvm_stat *kvm,
        time_diff = sample->time - time_begin;
 
        if (kvm->duration && time_diff > kvm->duration) {
-               char decode[DECODE_STR_LEN];
+               char decode[decode_str_len];
 
                kvm->events_ops->decode_key(kvm, &event->key, decode);
                if (!skip_event(decode)) {
@@ -385,7 +384,8 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
                        return NULL;
                }
 
-               vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID);
+               vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
+                                                         vcpu_id_str);
                thread__set_priv(thread, vcpu_record);
        }
 
@@ -574,7 +574,7 @@ static void show_timeofday(void)
 
 static void print_result(struct perf_kvm_stat *kvm)
 {
-       char decode[DECODE_STR_LEN];
+       char decode[decode_str_len];
        struct kvm_event *event;
        int vcpu = kvm->trace_vcpu;
 
@@ -585,7 +585,7 @@ static void print_result(struct perf_kvm_stat *kvm)
 
        pr_info("\n\n");
        print_vcpu_info(kvm);
-       pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name);
+       pr_info("%*s ", decode_str_len, kvm->events_ops->name);
        pr_info("%10s ", "Samples");
        pr_info("%9s ", "Samples%");
 
@@ -604,7 +604,7 @@ static void print_result(struct perf_kvm_stat *kvm)
                min = get_event_min(event, vcpu);
 
                kvm->events_ops->decode_key(kvm, &event->key, decode);
-               pr_info("%*s ", DECODE_STR_LEN, decode);
+               pr_info("%*s ", decode_str_len, decode);
                pr_info("%10llu ", (unsigned long long)ecount);
                pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
                pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
@@ -1132,6 +1132,11 @@ exit:
                _p;                     \
        })
 
+int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
+{
+       return 0;
+}
+
 static int
 kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
 {
@@ -1148,7 +1153,14 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
                NULL
        };
        const char * const *events_tp;
+       int ret;
+
        events_tp_size = 0;
+       ret = setup_kvm_events_tp(kvm);
+       if (ret < 0) {
+               pr_err("Unable to setup the kvm tracepoints\n");
+               return ret;
+       }
 
        for (events_tp = kvm_events_tp; *events_tp; events_tp++)
                events_tp_size++;
@@ -1377,6 +1389,12 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
        /*
         * generate the event list
         */
+       err = setup_kvm_events_tp(kvm);
+       if (err < 0) {
+               pr_err("Unable to setup the kvm tracepoints\n");
+               return err;
+       }
+
        kvm->evlist = kvm_live_event_list();
        if (kvm->evlist == NULL) {
                err = -1;
index 39017004169665ec4a2ebd59aa0bc43e58412874..88aeac9aa1da12c664ec64ee419d6ce48c5ad8ec 100644 (file)
@@ -6,6 +6,8 @@
 #include "util/tool.h"
 #include "util/session.h"
 #include "util/data.h"
+#include "util/mem-events.h"
+#include "util/debug.h"
 
 #define MEM_OPERATION_LOAD     0x1
 #define MEM_OPERATION_STORE    0x2
@@ -21,11 +23,56 @@ struct perf_mem {
        DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+static int parse_record_events(const struct option *opt,
+                              const char *str, int unset __maybe_unused)
+{
+       struct perf_mem *mem = *(struct perf_mem **)opt->value;
+       int j;
+
+       if (strcmp(str, "list")) {
+               if (!perf_mem_events__parse(str)) {
+                       mem->operation = 0;
+                       return 0;
+               }
+               exit(-1);
+       }
+
+       for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+               struct perf_mem_event *e = &perf_mem_events[j];
+
+               fprintf(stderr, "%-13s%-*s%s\n",
+                       e->tag,
+                       verbose ? 25 : 0,
+                       verbose ? perf_mem_events__name(j) : "",
+                       e->supported ? ": available" : "");
+       }
+       exit(0);
+}
+
+static const char * const __usage[] = {
+       "perf mem record [<options>] [<command>]",
+       "perf mem record [<options>] -- <command> [<options>]",
+       NULL
+};
+
+static const char * const *record_mem_usage = __usage;
+
 static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 {
        int rec_argc, i = 0, j;
        const char **rec_argv;
        int ret;
+       struct option options[] = {
+       OPT_CALLBACK('e', "event", &mem, "event",
+                    "event selector. use 'perf mem record -e list' to list available events",
+                    parse_record_events),
+       OPT_INCR('v', "verbose", &verbose,
+                "be more verbose (show counter open errors, etc)"),
+       OPT_END()
+       };
+
+       argc = parse_options(argc, argv, options, record_mem_usage,
+                            PARSE_OPT_STOP_AT_NON_OPTION);
 
        rec_argc = argc + 7; /* max number of arguments */
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -35,23 +82,40 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        rec_argv[i++] = "record";
 
        if (mem->operation & MEM_OPERATION_LOAD)
+               perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+
+       if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
                rec_argv[i++] = "-W";
 
        rec_argv[i++] = "-d";
 
-       if (mem->operation & MEM_OPERATION_LOAD) {
-               rec_argv[i++] = "-e";
-               rec_argv[i++] = "cpu/mem-loads/pp";
-       }
+       for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+               if (!perf_mem_events[j].record)
+                       continue;
+
+               if (!perf_mem_events[j].supported) {
+                       pr_err("failed: event '%s' not supported\n",
+                              perf_mem_events__name(j));
+                       return -1;
+               }
 
-       if (mem->operation & MEM_OPERATION_STORE) {
                rec_argv[i++] = "-e";
-               rec_argv[i++] = "cpu/mem-stores/pp";
-       }
+               rec_argv[i++] = perf_mem_events__name(j);
+       };
 
-       for (j = 1; j < argc; j++, i++)
+       for (j = 0; j < argc; j++, i++)
                rec_argv[i] = argv[j];
 
+       if (verbose > 0) {
+               pr_debug("calling: record ");
+
+               while (rec_argv[j]) {
+                       pr_debug("%s ", rec_argv[j]);
+                       j++;
+               }
+               pr_debug("\n");
+       }
+
        ret = cmd_record(i, rec_argv, NULL);
        free(rec_argv);
        return ret;
@@ -298,6 +362,10 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
                NULL
        };
 
+       if (perf_mem_events__init()) {
+               pr_err("failed: memory events not supported\n");
+               return -1;
+       }
 
        argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
                                        mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
index 319712a4e02b73de7359fc2ed772bed1d9cd424f..515510ecc76a43391e2ac58f830557b51810b466 100644 (file)
@@ -32,6 +32,8 @@
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
+#include "util/bpf-loader.h"
+#include "asm/bug.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -49,7 +51,9 @@ struct record {
        const char              *progname;
        int                     realtime_prio;
        bool                    no_buildid;
+       bool                    no_buildid_set;
        bool                    no_buildid_cache;
+       bool                    no_buildid_cache_set;
        bool                    buildid_all;
        unsigned long long      samples;
 };
@@ -320,7 +324,10 @@ try_again:
                } else {
                        pr_err("failed to mmap with %d (%s)\n", errno,
                                strerror_r(errno, msg, sizeof(msg)));
-                       rc = -errno;
+                       if (errno)
+                               rc = -errno;
+                       else
+                               rc = -EINVAL;
                }
                goto out;
        }
@@ -464,6 +471,29 @@ static void record__init_features(struct record *rec)
        perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
+static void
+record__finish_output(struct record *rec)
+{
+       struct perf_data_file *file = &rec->file;
+       int fd = perf_data_file__fd(file);
+
+       if (file->is_pipe)
+               return;
+
+       rec->session->header.data_size += rec->bytes_written;
+       file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
+
+       if (!rec->no_buildid) {
+               process_buildids(rec);
+
+               if (rec->buildid_all)
+                       dsos__hit_all(rec->session);
+       }
+       perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+       return;
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -482,6 +512,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 
 static void snapshot_sig_handler(int sig);
 
+static int record__synthesize(struct record *rec)
+{
+       struct perf_session *session = rec->session;
+       struct machine *machine = &session->machines.host;
+       struct perf_data_file *file = &rec->file;
+       struct record_opts *opts = &rec->opts;
+       struct perf_tool *tool = &rec->tool;
+       int fd = perf_data_file__fd(file);
+       int err = 0;
+
+       if (file->is_pipe) {
+               err = perf_event__synthesize_attrs(tool, session,
+                                                  process_synthesized_event);
+               if (err < 0) {
+                       pr_err("Couldn't synthesize attrs.\n");
+                       goto out;
+               }
+
+               if (have_tracepoints(&rec->evlist->entries)) {
+                       /*
+                        * FIXME err <= 0 here actually means that
+                        * there were no tracepoints so its not really
+                        * an error, just that we don't need to
+                        * synthesize anything.  We really have to
+                        * return this more properly and also
+                        * propagate errors that now are calling die()
+                        */
+                       err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
+                                                                 process_synthesized_event);
+                       if (err <= 0) {
+                               pr_err("Couldn't record tracing data.\n");
+                               goto out;
+                       }
+                       rec->bytes_written += err;
+               }
+       }
+
+       if (rec->opts.full_auxtrace) {
+               err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+                                       session, process_synthesized_event);
+               if (err)
+                       goto out;
+       }
+
+       err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+                                                machine);
+       WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+                          "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                          "Check /proc/kallsyms permission or run as root.\n");
+
+       err = perf_event__synthesize_modules(tool, process_synthesized_event,
+                                            machine);
+       WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+                          "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                          "Check /proc/modules permission or run as root.\n");
+
+       if (perf_guest) {
+               machines__process_guests(&session->machines,
+                                        perf_event__synthesize_guest_os, tool);
+       }
+
+       err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+                                           process_synthesized_event, opts->sample_address,
+                                           opts->proc_map_timeout);
+out:
+       return err;
+}
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
        int err;
@@ -534,6 +632,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                goto out_child;
        }
 
+       err = bpf__apply_obj_config();
+       if (err) {
+               char errbuf[BUFSIZ];
+
+               bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+               pr_err("ERROR: Apply config to BPF failed: %s\n",
+                        errbuf);
+               goto out_child;
+       }
+
        /*
         * Normally perf_session__new would do this, but it doesn't have the
         * evlist.
@@ -566,63 +674,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
        machine = &session->machines.host;
 
-       if (file->is_pipe) {
-               err = perf_event__synthesize_attrs(tool, session,
-                                                  process_synthesized_event);
-               if (err < 0) {
-                       pr_err("Couldn't synthesize attrs.\n");
-                       goto out_child;
-               }
-
-               if (have_tracepoints(&rec->evlist->entries)) {
-                       /*
-                        * FIXME err <= 0 here actually means that
-                        * there were no tracepoints so its not really
-                        * an error, just that we don't need to
-                        * synthesize anything.  We really have to
-                        * return this more properly and also
-                        * propagate errors that now are calling die()
-                        */
-                       err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
-                                                                 process_synthesized_event);
-                       if (err <= 0) {
-                               pr_err("Couldn't record tracing data.\n");
-                               goto out_child;
-                       }
-                       rec->bytes_written += err;
-               }
-       }
-
-       if (rec->opts.full_auxtrace) {
-               err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
-                                       session, process_synthesized_event);
-               if (err)
-                       goto out_delete_session;
-       }
-
-       err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-                                                machine);
-       if (err < 0)
-               pr_err("Couldn't record kernel reference relocation symbol\n"
-                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-                      "Check /proc/kallsyms permission or run as root.\n");
-
-       err = perf_event__synthesize_modules(tool, process_synthesized_event,
-                                            machine);
+       err = record__synthesize(rec);
        if (err < 0)
-               pr_err("Couldn't record kernel module information.\n"
-                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-                      "Check /proc/modules permission or run as root.\n");
-
-       if (perf_guest) {
-               machines__process_guests(&session->machines,
-                                        perf_event__synthesize_guest_os, tool);
-       }
-
-       err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-                                           process_synthesized_event, opts->sample_address,
-                                           opts->proc_map_timeout);
-       if (err != 0)
                goto out_child;
 
        if (rec->realtime_prio) {
@@ -758,18 +811,8 @@ out_child:
        /* this will be recalculated during process_buildids() */
        rec->samples = 0;
 
-       if (!err && !file->is_pipe) {
-               rec->session->header.data_size += rec->bytes_written;
-               file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-
-               if (!rec->no_buildid) {
-                       process_buildids(rec);
-
-                       if (rec->buildid_all)
-                               dsos__hit_all(rec->session);
-               }
-               perf_session__write_header(rec->session, rec->evlist, fd, true);
-       }
+       if (!err)
+               record__finish_output(rec);
 
        if (!err && !quiet) {
                char samples[128];
@@ -1097,10 +1140,12 @@ struct option __record_options[] = {
        OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
        OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
                    "don't sample"),
-       OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
-                   "do not update the buildid cache"),
-       OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
-                   "do not collect buildids in perf.data"),
+       OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
+                       &record.no_buildid_cache_set,
+                       "do not update the buildid cache"),
+       OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
+                       &record.no_buildid_set,
+                       "do not collect buildids in perf.data"),
        OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
                     "monitor event in cgroup name only",
                     parse_cgroups),
@@ -1136,6 +1181,12 @@ struct option __record_options[] = {
                        "per thread proc mmap processing timeout in ms"),
        OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
                    "Record context switch events"),
+       OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
+                        "Configure all used events to run in kernel space.",
+                        PARSE_OPT_EXCLUSIVE),
+       OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
+                        "Configure all used events to run in user space.",
+                        PARSE_OPT_EXCLUSIVE),
        OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
                   "clang binary to use for compiling BPF scriptlets"),
        OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
index 2bf537f190a026952bd63126e9c37def827e9914..7eea49f9ed46b520bdce56972379b76c14302c4b 100644 (file)
@@ -75,7 +75,10 @@ static int report__config(const char *var, const char *value, void *cb)
                return 0;
        }
        if (!strcmp(var, "report.percent-limit")) {
-               rep->min_percent = strtof(value, NULL);
+               double pcnt = strtof(value, NULL);
+
+               rep->min_percent = pcnt;
+               callchain_param.min_percent = pcnt;
                return 0;
        }
        if (!strcmp(var, "report.children")) {
@@ -87,7 +90,7 @@ static int report__config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int hist_iter__report_callback(struct hist_entry_iter *iter,
@@ -466,10 +469,11 @@ static int report__browse_hists(struct report *rep)
        return ret;
 }
 
-static void report__collapse_hists(struct report *rep)
+static int report__collapse_hists(struct report *rep)
 {
        struct ui_progress prog;
        struct perf_evsel *pos;
+       int ret = 0;
 
        ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
 
@@ -481,7 +485,9 @@ static void report__collapse_hists(struct report *rep)
 
                hists->socket_filter = rep->socket_filter;
 
-               hists__collapse_resort(hists, &prog);
+               ret = hists__collapse_resort(hists, &prog);
+               if (ret < 0)
+                       break;
 
                /* Non-group events are considered as leader */
                if (symbol_conf.event_group &&
@@ -494,6 +500,7 @@ static void report__collapse_hists(struct report *rep)
        }
 
        ui_progress__finish();
+       return ret;
 }
 
 static void report__output_resort(struct report *rep)
@@ -504,7 +511,7 @@ static void report__output_resort(struct report *rep)
        ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
 
        evlist__for_each(rep->session->evlist, pos)
-               hists__output_resort(evsel__hists(pos), &prog);
+               perf_evsel__output_resort(pos, &prog);
 
        ui_progress__finish();
 }
@@ -561,7 +568,11 @@ static int __cmd_report(struct report *rep)
                }
        }
 
-       report__collapse_hists(rep);
+       ret = report__collapse_hists(rep);
+       if (ret) {
+               ui__error("failed to process hist entry\n");
+               return ret;
+       }
 
        if (session_done())
                return 0;
@@ -633,8 +644,10 @@ parse_percent_limit(const struct option *opt, const char *str,
                    int unset __maybe_unused)
 {
        struct report *rep = opt->value;
+       double pcnt = strtof(str, NULL);
 
-       rep->min_percent = strtof(str, NULL);
+       rep->min_percent = pcnt;
+       callchain_param.min_percent = pcnt;
        return 0;
 }
 
@@ -798,6 +811,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                    "only show processor socket that match with this filter"),
        OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
                    "Show raw trace event output (do not use print fmt or plugins)"),
+       OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+                   "Show entries in a hierarchy"),
        OPT_END()
        };
        struct perf_data_file file = {
@@ -907,13 +922,19 @@ repeat:
                symbol_conf.cumulate_callchain = false;
        }
 
-       if (setup_sorting(session->evlist) < 0) {
-               if (sort_order)
-                       parse_options_usage(report_usage, options, "s", 1);
-               if (field_order)
-                       parse_options_usage(sort_order ? NULL : report_usage,
-                                           options, "F", 1);
-               goto error;
+       if (symbol_conf.report_hierarchy) {
+               /* disable incompatible options */
+               symbol_conf.event_group = false;
+               symbol_conf.cumulate_callchain = false;
+
+               if (field_order) {
+                       pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+                       parse_options_usage(report_usage, options, "F", 1);
+                       parse_options_usage(NULL, options, "hierarchy", 0);
+                       goto error;
+               }
+
+               sort__need_collapse = true;
        }
 
        /* Force tty output for header output and per-thread stat. */
@@ -925,6 +946,15 @@ repeat:
        else
                use_browser = 0;
 
+       if (setup_sorting(session->evlist) < 0) {
+               if (sort_order)
+                       parse_options_usage(report_usage, options, "s", 1);
+               if (field_order)
+                       parse_options_usage(sort_order ? NULL : report_usage,
+                                           options, "F", 1);
+               goto error;
+       }
+
        if (report.header || report.header_only) {
                perf_session__fprintf_info(session, stdout,
                                           report.show_full_info);
index c691214d820f0050c39e9d376d69cd5891132a47..57f9a7e7f7d3e948a29a92018140902c7f5c840f 100644 (file)
@@ -23,6 +23,7 @@
 #include "util/stat.h"
 #include <linux/bitmap.h>
 #include "asm/bug.h"
+#include "util/mem-events.h"
 
 static char const              *script_name;
 static char const              *generate_script_lang;
@@ -58,6 +59,9 @@ enum perf_output_field {
        PERF_OUTPUT_IREGS           = 1U << 14,
        PERF_OUTPUT_BRSTACK         = 1U << 15,
        PERF_OUTPUT_BRSTACKSYM      = 1U << 16,
+       PERF_OUTPUT_DATA_SRC        = 1U << 17,
+       PERF_OUTPUT_WEIGHT          = 1U << 18,
+       PERF_OUTPUT_BPF_OUTPUT      = 1U << 19,
 };
 
 struct output_option {
@@ -81,6 +85,9 @@ struct output_option {
        {.str = "iregs", .field = PERF_OUTPUT_IREGS},
        {.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
        {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
+       {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
+       {.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
+       {.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
 };
 
 /* default set to maintain compatibility with current format */
@@ -101,7 +108,7 @@ static struct {
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
                              PERF_OUTPUT_PERIOD,
 
-               .invalid_fields = PERF_OUTPUT_TRACE,
+               .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
 
        [PERF_TYPE_SOFTWARE] = {
@@ -111,7 +118,7 @@ static struct {
                              PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
                              PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-                             PERF_OUTPUT_PERIOD,
+                             PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
 
                .invalid_fields = PERF_OUTPUT_TRACE,
        },
@@ -121,7 +128,7 @@ static struct {
 
                .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
                                  PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
-                                 PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE,
+                                 PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
        },
 
        [PERF_TYPE_RAW] = {
@@ -131,9 +138,10 @@ static struct {
                              PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
                              PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-                             PERF_OUTPUT_PERIOD,
+                             PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
+                             PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
 
-               .invalid_fields = PERF_OUTPUT_TRACE,
+               .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
 
        [PERF_TYPE_BREAKPOINT] = {
@@ -145,7 +153,7 @@ static struct {
                              PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
                              PERF_OUTPUT_PERIOD,
 
-               .invalid_fields = PERF_OUTPUT_TRACE,
+               .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
 };
 
@@ -242,6 +250,16 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
                                           PERF_OUTPUT_ADDR, allow_user_set))
                return -EINVAL;
 
+       if (PRINT_FIELD(DATA_SRC) &&
+               perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
+                                       PERF_OUTPUT_DATA_SRC))
+               return -EINVAL;
+
+       if (PRINT_FIELD(WEIGHT) &&
+               perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
+                                       PERF_OUTPUT_WEIGHT))
+               return -EINVAL;
+
        if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
                pr_err("Display of symbols requested but neither sample IP nor "
                           "sample address\nis selected. Hence, no addresses to convert "
@@ -608,6 +626,84 @@ static void print_sample_flags(u32 flags)
        printf("  %-4s ", str);
 }
 
+struct printer_data {
+       int line_no;
+       bool hit_nul;
+       bool is_printable;
+};
+
+static void
+print_sample_bpf_output_printer(enum binary_printer_ops op,
+                               unsigned int val,
+                               void *extra)
+{
+       unsigned char ch = (unsigned char)val;
+       struct printer_data *printer_data = extra;
+
+       switch (op) {
+       case BINARY_PRINT_DATA_BEGIN:
+               printf("\n");
+               break;
+       case BINARY_PRINT_LINE_BEGIN:
+               printf("%17s", !printer_data->line_no ? "BPF output:" :
+                                                       "           ");
+               break;
+       case BINARY_PRINT_ADDR:
+               printf(" %04x:", val);
+               break;
+       case BINARY_PRINT_NUM_DATA:
+               printf(" %02x", val);
+               break;
+       case BINARY_PRINT_NUM_PAD:
+               printf("   ");
+               break;
+       case BINARY_PRINT_SEP:
+               printf("  ");
+               break;
+       case BINARY_PRINT_CHAR_DATA:
+               if (printer_data->hit_nul && ch)
+                       printer_data->is_printable = false;
+
+               if (!isprint(ch)) {
+                       printf("%c", '.');
+
+                       if (!printer_data->is_printable)
+                               break;
+
+                       if (ch == '\0')
+                               printer_data->hit_nul = true;
+                       else
+                               printer_data->is_printable = false;
+               } else {
+                       printf("%c", ch);
+               }
+               break;
+       case BINARY_PRINT_CHAR_PAD:
+               printf(" ");
+               break;
+       case BINARY_PRINT_LINE_END:
+               printf("\n");
+               printer_data->line_no++;
+               break;
+       case BINARY_PRINT_DATA_END:
+       default:
+               break;
+       }
+}
+
+static void print_sample_bpf_output(struct perf_sample *sample)
+{
+       unsigned int nr_bytes = sample->raw_size;
+       struct printer_data printer_data = {0, false, true};
+
+       print_binary(sample->raw_data, nr_bytes, 8,
+                    print_sample_bpf_output_printer, &printer_data);
+
+       if (printer_data.is_printable && printer_data.hit_nul)
+               printf("%17s \"%s\"\n", "BPF string:",
+                      (char *)(sample->raw_data));
+}
+
 struct perf_script {
        struct perf_tool        tool;
        struct perf_session     *session;
@@ -634,6 +730,23 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist)
        return max;
 }
 
+static size_t data_src__printf(u64 data_src)
+{
+       struct mem_info mi = { .data_src.val = data_src };
+       char decode[100];
+       char out[100];
+       static int maxlen;
+       int len;
+
+       perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+       len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode);
+       if (maxlen < len)
+               maxlen = len;
+
+       return printf("%-*s", maxlen, out);
+}
+
 static void process_event(struct perf_script *script, union perf_event *event,
                          struct perf_sample *sample, struct perf_evsel *evsel,
                          struct addr_location *al)
@@ -673,6 +786,12 @@ static void process_event(struct perf_script *script, union perf_event *event,
        if (PRINT_FIELD(ADDR))
                print_sample_addr(event, sample, thread, attr);
 
+       if (PRINT_FIELD(DATA_SRC))
+               data_src__printf(sample->data_src);
+
+       if (PRINT_FIELD(WEIGHT))
+               printf("%16" PRIu64, sample->weight);
+
        if (PRINT_FIELD(IP)) {
                if (!symbol_conf.use_callchain)
                        printf(" ");
@@ -692,6 +811,9 @@ static void process_event(struct perf_script *script, union perf_event *event,
        else if (PRINT_FIELD(BRSTACKSYM))
                print_sample_brstacksym(event, sample, thread, attr);
 
+       if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+               print_sample_bpf_output(sample);
+
        printf("\n");
 }
 
@@ -1090,23 +1212,6 @@ static struct script_spec *script_spec__find(const char *spec)
        return NULL;
 }
 
-static struct script_spec *script_spec__findnew(const char *spec,
-                                               struct scripting_ops *ops)
-{
-       struct script_spec *s = script_spec__find(spec);
-
-       if (s)
-               return s;
-
-       s = script_spec__new(spec, ops);
-       if (!s)
-               return NULL;
-
-       script_spec__add(s);
-
-       return s;
-}
-
 int script_spec_register(const char *spec, struct scripting_ops *ops)
 {
        struct script_spec *s;
@@ -1115,9 +1220,11 @@ int script_spec_register(const char *spec, struct scripting_ops *ops)
        if (s)
                return -1;
 
-       s = script_spec__findnew(spec, ops);
+       s = script_spec__new(spec, ops);
        if (!s)
                return -1;
+       else
+               script_spec__add(s);
 
        return 0;
 }
index 038e877081b682dd8d9ba052a0be66c8ac2786c6..1f19f2f999c841b9da140e10bcaf5e6e0f41ee6b 100644 (file)
@@ -122,6 +122,7 @@ static bool                 sync_run                        = false;
 static unsigned int            initial_delay                   = 0;
 static unsigned int            unit_width                      = 4; /* strlen("unit") */
 static bool                    forever                         = false;
+static bool                    metric_only                     = false;
 static struct timespec         ref_time;
 static struct cpu_map          *aggr_map;
 static aggr_get_id_t           aggr_get_id;
@@ -735,6 +736,191 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
        }
 }
 
+struct outstate {
+       FILE *fh;
+       bool newline;
+       const char *prefix;
+       int  nfields;
+       int  id, nr;
+       struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN  35
+
+static void new_line_std(void *ctx)
+{
+       struct outstate *os = ctx;
+
+       os->newline = true;
+}
+
+static void do_new_line_std(struct outstate *os)
+{
+       fputc('\n', os->fh);
+       fputs(os->prefix, os->fh);
+       aggr_printout(os->evsel, os->id, os->nr);
+       if (stat_config.aggr_mode == AGGR_NONE)
+               fprintf(os->fh, "        ");
+       fprintf(os->fh, "                                                 ");
+}
+
+static void print_metric_std(void *ctx, const char *color, const char *fmt,
+                            const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       int n;
+       bool newline = os->newline;
+
+       os->newline = false;
+
+       if (unit == NULL || fmt == NULL) {
+               fprintf(out, "%-*s", METRIC_LEN, "");
+               return;
+       }
+
+       if (newline)
+               do_new_line_std(os);
+
+       n = fprintf(out, " # ");
+       if (color)
+               n += color_fprintf(out, color, fmt, val);
+       else
+               n += fprintf(out, fmt, val);
+       fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(void *ctx)
+{
+       struct outstate *os = ctx;
+       int i;
+
+       fputc('\n', os->fh);
+       if (os->prefix)
+               fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+       aggr_printout(os->evsel, os->id, os->nr);
+       for (i = 0; i < os->nfields; i++)
+               fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+                            const char *color __maybe_unused,
+                            const char *fmt, const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       char buf[64], *vals, *ends;
+
+       if (unit == NULL || fmt == NULL) {
+               fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep);
+               return;
+       }
+       snprintf(buf, sizeof(buf), fmt, val);
+       vals = buf;
+       while (isspace(*vals))
+               vals++;
+       ends = vals;
+       while (isdigit(*ends) || *ends == '.')
+               ends++;
+       *ends = 0;
+       while (isspace(*unit))
+               unit++;
+       fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
+#define METRIC_ONLY_LEN 20
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+       if (!unit)
+               return false;
+       if (strstr(unit, "/sec") ||
+           strstr(unit, "hz") ||
+           strstr(unit, "Hz") ||
+           strstr(unit, "CPUs utilized"))
+               return false;
+       return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+                          const char *unit)
+{
+       if (!strncmp(unit, "of all", 6)) {
+               snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+                        unit);
+               return buf;
+       }
+       return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+                             const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       int n;
+       char buf[1024];
+       unsigned mlen = METRIC_ONLY_LEN;
+
+       if (!valid_only_metric(unit))
+               return;
+       unit = fixunit(buf, os->evsel, unit);
+       if (color)
+               n = color_fprintf(out, color, fmt, val);
+       else
+               n = fprintf(out, fmt, val);
+       if (n > METRIC_ONLY_LEN)
+               n = METRIC_ONLY_LEN;
+       if (mlen < strlen(unit))
+               mlen = strlen(unit) + 1;
+       fprintf(out, "%*s", mlen - n, "");
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+                                 const char *fmt,
+                                 const char *unit, double val)
+{
+       struct outstate *os = ctx;
+       FILE *out = os->fh;
+       char buf[64], *vals, *ends;
+       char tbuf[1024];
+
+       if (!valid_only_metric(unit))
+               return;
+       unit = fixunit(tbuf, os->evsel, unit);
+       snprintf(buf, sizeof buf, fmt, val);
+       vals = buf;
+       while (isspace(*vals))
+               vals++;
+       ends = vals;
+       while (isdigit(*ends) || *ends == '.')
+               ends++;
+       *ends = 0;
+       fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+                               const char *fmt __maybe_unused,
+                               const char *unit, double val __maybe_unused)
+{
+       struct outstate *os = ctx;
+       char tbuf[1024];
+
+       if (!valid_only_metric(unit))
+               return;
+       unit = fixunit(tbuf, os->evsel, unit);
+       if (csv_output)
+               fprintf(os->fh, "%s%s", unit, csv_sep);
+       else
+               fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+}
+
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
        FILE *output = stat_config.output;
@@ -763,6 +949,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+       int i;
+
+       if (!aggr_get_id)
+               return 0;
+
+       if (stat_config.aggr_mode == AGGR_NONE)
+               return id;
+
+       if (stat_config.aggr_mode == AGGR_GLOBAL)
+               return 0;
+
+       for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+               int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+               if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+                       return cpu2;
+       }
+       return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
        FILE *output = stat_config.output;
@@ -793,22 +1001,124 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
-static void printout(int id, int nr, struct perf_evsel *counter, double uval)
+static void printout(int id, int nr, struct perf_evsel *counter, double uval,
+                    char *prefix, u64 run, u64 ena, double noise)
 {
-       int cpu = cpu_map__id_to_cpu(id);
+       struct perf_stat_output_ctx out;
+       struct outstate os = {
+               .fh = stat_config.output,
+               .prefix = prefix ? prefix : "",
+               .id = id,
+               .nr = nr,
+               .evsel = counter,
+       };
+       print_metric_t pm = print_metric_std;
+       void (*nl)(void *);
 
-       if (stat_config.aggr_mode == AGGR_GLOBAL)
-               cpu = 0;
+       if (metric_only) {
+               nl = new_line_metric;
+               if (csv_output)
+                       pm = print_metric_only_csv;
+               else
+                       pm = print_metric_only;
+       } else
+               nl = new_line_std;
+
+       if (csv_output && !metric_only) {
+               static int aggr_fields[] = {
+                       [AGGR_GLOBAL] = 0,
+                       [AGGR_THREAD] = 1,
+                       [AGGR_NONE] = 1,
+                       [AGGR_SOCKET] = 2,
+                       [AGGR_CORE] = 2,
+               };
+
+               pm = print_metric_csv;
+               nl = new_line_csv;
+               os.nfields = 3;
+               os.nfields += aggr_fields[stat_config.aggr_mode];
+               if (counter->cgrp)
+                       os.nfields++;
+       }
+       if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+               if (metric_only) {
+                       pm(&os, NULL, "", "", 0);
+                       return;
+               }
+               aggr_printout(counter, id, nr);
+
+               fprintf(stat_config.output, "%*s%s",
+                       csv_output ? 0 : 18,
+                       counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+                       csv_sep);
+
+               fprintf(stat_config.output, "%-*s%s",
+                       csv_output ? 0 : unit_width,
+                       counter->unit, csv_sep);
+
+               fprintf(stat_config.output, "%*s",
+                       csv_output ? 0 : -25,
+                       perf_evsel__name(counter));
+
+               if (counter->cgrp)
+                       fprintf(stat_config.output, "%s%s",
+                               csv_sep, counter->cgrp->name);
 
-       if (nsec_counter(counter))
+               if (!csv_output)
+                       pm(&os, NULL, NULL, "", 0);
+               print_noise(counter, noise);
+               print_running(run, ena);
+               if (csv_output)
+                       pm(&os, NULL, NULL, "", 0);
+               return;
+       }
+
+       if (metric_only)
+               /* nothing */;
+       else if (nsec_counter(counter))
                nsec_printout(id, nr, counter, uval);
        else
                abs_printout(id, nr, counter, uval);
 
-       if (!csv_output && !stat_config.interval)
-               perf_stat__print_shadow_stats(stat_config.output, counter,
-                                             uval, cpu,
-                                             stat_config.aggr_mode);
+       out.print_metric = pm;
+       out.new_line = nl;
+       out.ctx = &os;
+
+       if (csv_output && !metric_only) {
+               print_noise(counter, noise);
+               print_running(run, ena);
+       }
+
+       perf_stat__print_shadow_stats(counter, uval,
+                               first_shadow_cpu(counter, id),
+                               &out);
+       if (!csv_output && !metric_only) {
+               print_noise(counter, noise);
+               print_running(run, ena);
+       }
+}
+
+static void aggr_update_shadow(void)
+{
+       int cpu, s2, id, s;
+       u64 val;
+       struct perf_evsel *counter;
+
+       for (s = 0; s < aggr_map->nr; s++) {
+               id = aggr_map->map[s];
+               evlist__for_each(evsel_list, counter) {
+                       val = 0;
+                       for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+                               s2 = aggr_get_id(evsel_list->cpus, cpu);
+                               if (s2 != id)
+                                       continue;
+                               val += perf_counts(counter->counts, cpu, 0)->val;
+                       }
+                       val = val * counter->scale;
+                       perf_stat__update_shadow_stats(counter, &val,
+                                                      first_shadow_cpu(counter, id));
+               }
+       }
 }
 
 static void print_aggr(char *prefix)
@@ -818,12 +1128,23 @@ static void print_aggr(char *prefix)
        int cpu, s, s2, id, nr;
        double uval;
        u64 ena, run, val;
+       bool first;
 
        if (!(aggr_map || aggr_get_id))
                return;
 
+       aggr_update_shadow();
+
+       /*
+        * With metric_only everything is on a single line.
+        * Without each counter has its own line.
+        */
        for (s = 0; s < aggr_map->nr; s++) {
+               if (prefix && metric_only)
+                       fprintf(output, "%s", prefix);
+
                id = aggr_map->map[s];
+               first = true;
                evlist__for_each(evsel_list, counter) {
                        val = ena = run = 0;
                        nr = 0;
@@ -836,41 +1157,20 @@ static void print_aggr(char *prefix)
                                run += perf_counts(counter->counts, cpu, 0)->run;
                                nr++;
                        }
-                       if (prefix)
-                               fprintf(output, "%s", prefix);
-
-                       if (run == 0 || ena == 0) {
+                       if (first && metric_only) {
+                               first = false;
                                aggr_printout(counter, id, nr);
-
-                               fprintf(output, "%*s%s",
-                                       csv_output ? 0 : 18,
-                                       counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-                                       csv_sep);
-
-                               fprintf(output, "%-*s%s",
-                                       csv_output ? 0 : unit_width,
-                                       counter->unit, csv_sep);
-
-                               fprintf(output, "%*s",
-                                       csv_output ? 0 : -25,
-                                       perf_evsel__name(counter));
-
-                               if (counter->cgrp)
-                                       fprintf(output, "%s%s",
-                                               csv_sep, counter->cgrp->name);
-
-                               print_running(run, ena);
-                               fputc('\n', output);
-                               continue;
                        }
-                       uval = val * counter->scale;
-                       printout(id, nr, counter, uval);
-                       if (!csv_output)
-                               print_noise(counter, 1.0);
+                       if (prefix && !metric_only)
+                               fprintf(output, "%s", prefix);
 
-                       print_running(run, ena);
-                       fputc('\n', output);
+                       uval = val * counter->scale;
+                       printout(id, nr, counter, uval, prefix, run, ena, 1.0);
+                       if (!metric_only)
+                               fputc('\n', output);
                }
+               if (metric_only)
+                       fputc('\n', output);
        }
 }
 
@@ -895,12 +1195,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
                        fprintf(output, "%s", prefix);
 
                uval = val * counter->scale;
-               printout(thread, 0, counter, uval);
-
-               if (!csv_output)
-                       print_noise(counter, 1.0);
-
-               print_running(run, ena);
+               printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
                fputc('\n', output);
        }
 }
@@ -914,43 +1209,19 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
        FILE *output = stat_config.output;
        struct perf_stat_evsel *ps = counter->priv;
        double avg = avg_stats(&ps->res_stats[0]);
-       int scaled = counter->counts->scaled;
        double uval;
        double avg_enabled, avg_running;
 
        avg_enabled = avg_stats(&ps->res_stats[1]);
        avg_running = avg_stats(&ps->res_stats[2]);
 
-       if (prefix)
+       if (prefix && !metric_only)
                fprintf(output, "%s", prefix);
 
-       if (scaled == -1 || !counter->supported) {
-               fprintf(output, "%*s%s",
-                       csv_output ? 0 : 18,
-                       counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-                       csv_sep);
-               fprintf(output, "%-*s%s",
-                       csv_output ? 0 : unit_width,
-                       counter->unit, csv_sep);
-               fprintf(output, "%*s",
-                       csv_output ? 0 : -25,
-                       perf_evsel__name(counter));
-
-               if (counter->cgrp)
-                       fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
-
-               print_running(avg_running, avg_enabled);
-               fputc('\n', output);
-               return;
-       }
-
        uval = avg * counter->scale;
-       printout(-1, 0, counter, uval);
-
-       print_noise(counter, avg);
-
-       print_running(avg_running, avg_enabled);
-       fprintf(output, "\n");
+       printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
+       if (!metric_only)
+               fprintf(output, "\n");
 }
 
 /*
@@ -972,39 +1243,78 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
                if (prefix)
                        fprintf(output, "%s", prefix);
 
-               if (run == 0 || ena == 0) {
-                       fprintf(output, "CPU%*d%s%*s%s",
-                               csv_output ? 0 : -4,
-                               perf_evsel__cpus(counter)->map[cpu], csv_sep,
-                               csv_output ? 0 : 18,
-                               counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-                               csv_sep);
+               uval = val * counter->scale;
+               printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
 
-                               fprintf(output, "%-*s%s",
-                                       csv_output ? 0 : unit_width,
-                                       counter->unit, csv_sep);
+               fputc('\n', output);
+       }
+}
 
-                               fprintf(output, "%*s",
-                                       csv_output ? 0 : -25,
-                                       perf_evsel__name(counter));
+static void print_no_aggr_metric(char *prefix)
+{
+       int cpu;
+       int nrcpus = 0;
+       struct perf_evsel *counter;
+       u64 ena, run, val;
+       double uval;
 
-                       if (counter->cgrp)
-                               fprintf(output, "%s%s",
-                                       csv_sep, counter->cgrp->name);
+       nrcpus = evsel_list->cpus->nr;
+       for (cpu = 0; cpu < nrcpus; cpu++) {
+               bool first = true;
 
-                       print_running(run, ena);
-                       fputc('\n', output);
-                       continue;
+               if (prefix)
+                       fputs(prefix, stat_config.output);
+               evlist__for_each(evsel_list, counter) {
+                       if (first) {
+                               aggr_printout(counter, cpu, 0);
+                               first = false;
+                       }
+                       val = perf_counts(counter->counts, cpu, 0)->val;
+                       ena = perf_counts(counter->counts, cpu, 0)->ena;
+                       run = perf_counts(counter->counts, cpu, 0)->run;
+
+                       uval = val * counter->scale;
+                       printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
                }
+               fputc('\n', stat_config.output);
+       }
+}
 
-               uval = val * counter->scale;
-               printout(cpu, 0, counter, uval);
-               if (!csv_output)
-                       print_noise(counter, 1.0);
-               print_running(run, ena);
+static int aggr_header_lens[] = {
+       [AGGR_CORE] = 18,
+       [AGGR_SOCKET] = 12,
+       [AGGR_NONE] = 6,
+       [AGGR_THREAD] = 24,
+       [AGGR_GLOBAL] = 0,
+};
 
-               fputc('\n', output);
+static void print_metric_headers(char *prefix)
+{
+       struct perf_stat_output_ctx out;
+       struct perf_evsel *counter;
+       struct outstate os = {
+               .fh = stat_config.output
+       };
+
+       if (prefix)
+               fprintf(stat_config.output, "%s", prefix);
+
+       if (!csv_output)
+               fprintf(stat_config.output, "%*s",
+                       aggr_header_lens[stat_config.aggr_mode], "");
+
+       /* Print metrics headers only */
+       evlist__for_each(evsel_list, counter) {
+               os.evsel = counter;
+               out.ctx = &os;
+               out.print_metric = print_metric_header;
+               out.new_line = new_line_metric;
+               os.evsel = counter;
+               perf_stat__print_shadow_stats(counter, 0,
+                                             0,
+                                             &out);
        }
+       fputc('\n', stat_config.output);
 }
 
 static void print_interval(char *prefix, struct timespec *ts)
@@ -1014,7 +1324,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 
        sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-       if (num_print_interval == 0 && !csv_output) {
+       if (num_print_interval == 0 && !csv_output && !metric_only) {
                switch (stat_config.aggr_mode) {
                case AGGR_SOCKET:
                        fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
@@ -1101,6 +1411,17 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        else
                print_header(argc, argv);
 
+       if (metric_only) {
+               static int num_print_iv;
+
+               if (num_print_iv == 0)
+                       print_metric_headers(prefix);
+               if (num_print_iv++ == 25)
+                       num_print_iv = 0;
+               if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+                       fprintf(stat_config.output, "%s", prefix);
+       }
+
        switch (stat_config.aggr_mode) {
        case AGGR_CORE:
        case AGGR_SOCKET:
@@ -1113,10 +1434,16 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        case AGGR_GLOBAL:
                evlist__for_each(evsel_list, counter)
                        print_counter_aggr(counter, prefix);
+               if (metric_only)
+                       fputc('\n', stat_config.output);
                break;
        case AGGR_NONE:
-               evlist__for_each(evsel_list, counter)
-                       print_counter(counter, prefix);
+               if (metric_only)
+                       print_no_aggr_metric(prefix);
+               else {
+                       evlist__for_each(evsel_list, counter)
+                               print_counter(counter, prefix);
+               }
                break;
        case AGGR_UNSET:
        default:
@@ -1237,6 +1564,8 @@ static const struct option stat_options[] = {
                     "aggregate counts per thread", AGGR_THREAD),
        OPT_UINTEGER('D', "delay", &initial_delay,
                     "ms to wait before starting measurement after program start"),
+       OPT_BOOLEAN(0, "metric-only", &metric_only,
+                       "Only print computed metrics. No raw values"),
        OPT_END()
 };
 
@@ -1435,7 +1764,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
  */
 static int add_default_attributes(void)
 {
-       struct perf_event_attr default_attrs[] = {
+       struct perf_event_attr default_attrs0[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES       },
@@ -1443,8 +1772,14 @@ static int add_default_attributes(void)
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS            },
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES             },
+};
+       struct perf_event_attr frontend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND        },
+};
+       struct perf_event_attr backend_attrs[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+};
+       struct perf_event_attr default_attrs1[] = {
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS           },
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS    },
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES          },
@@ -1561,7 +1896,19 @@ static int add_default_attributes(void)
        }
 
        if (!evsel_list->nr_entries) {
-               if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
+               if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+                       return -1;
+               if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+                       if (perf_evlist__add_default_attrs(evsel_list,
+                                               frontend_attrs) < 0)
+                               return -1;
+               }
+               if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+                       if (perf_evlist__add_default_attrs(evsel_list,
+                                               backend_attrs) < 0)
+                               return -1;
+               }
+               if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
                        return -1;
        }
 
@@ -1825,9 +2172,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
        if (evsel_list == NULL)
                return -ENOMEM;
 
+       parse_events__shrink_config_terms();
        argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
                                        (const char **) stat_usage,
                                        PARSE_OPT_STOP_AT_NON_OPTION);
+       perf_stat__init_shadow_stats();
 
        if (csv_sep) {
                csv_output = true;
@@ -1858,6 +2207,16 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
                goto out;
        }
 
+       if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+               fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+               goto out;
+       }
+
+       if (metric_only && run_count > 1) {
+               fprintf(stderr, "--metric-only is not supported with -r\n");
+               goto out;
+       }
+
        if (output_fd < 0) {
                fprintf(stderr, "argument to --log-fd must be a > 0\n");
                parse_options_usage(stat_usage, stat_options, "log-fd", 0);
index bf01cbb0ef2369f2fc904809b86b70a823bdb604..94af190f6843d2c92e68b2dfef8dc51017c42079 100644 (file)
@@ -252,7 +252,8 @@ static void perf_top__print_sym_table(struct perf_top *top)
        char bf[160];
        int printed = 0;
        const int win_width = top->winsize.ws_col - 1;
-       struct hists *hists = evsel__hists(top->sym_evsel);
+       struct perf_evsel *evsel = top->sym_evsel;
+       struct hists *hists = evsel__hists(evsel);
 
        puts(CONSOLE_CLEAR);
 
@@ -288,7 +289,7 @@ static void perf_top__print_sym_table(struct perf_top *top)
        }
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        hists__output_recalc_col_len(hists, top->print_entries - printed);
        putchar('\n');
@@ -540,6 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 static void perf_top__sort_new_samples(void *arg)
 {
        struct perf_top *t = arg;
+       struct perf_evsel *evsel = t->sym_evsel;
        struct hists *hists;
 
        perf_top__reset_sample_counters(t);
@@ -547,7 +549,7 @@ static void perf_top__sort_new_samples(void *arg)
        if (t->evlist->selected != NULL)
                t->sym_evsel = t->evlist->selected;
 
-       hists = evsel__hists(t->sym_evsel);
+       hists = evsel__hists(evsel);
 
        if (t->evlist->enabled) {
                if (t->zero) {
@@ -559,7 +561,7 @@ static void perf_top__sort_new_samples(void *arg)
        }
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 }
 
 static void *display_thread_tui(void *arg)
@@ -1063,7 +1065,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
        return parse_callchain_top_opt(arg);
 }
 
-static int perf_top_config(const char *var, const char *value, void *cb)
+static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
        if (!strcmp(var, "top.call-graph"))
                var = "call-graph.record-mode"; /* fall-through */
@@ -1072,7 +1074,7 @@ static int perf_top_config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int
@@ -1212,6 +1214,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                     parse_branch_stack),
        OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
                    "Show raw trace event output (do not use print fmt or plugins)"),
+       OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+                   "Show entries in a hierarchy"),
        OPT_END()
        };
        const char * const top_usage[] = {
@@ -1239,10 +1243,30 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                goto out_delete_evlist;
        }
 
+       if (symbol_conf.report_hierarchy) {
+               /* disable incompatible options */
+               symbol_conf.event_group = false;
+               symbol_conf.cumulate_callchain = false;
+
+               if (field_order) {
+                       pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+                       parse_options_usage(top_usage, options, "fields", 0);
+                       parse_options_usage(NULL, options, "hierarchy", 0);
+                       goto out_delete_evlist;
+               }
+       }
+
        sort__mode = SORT_MODE__TOP;
        /* display thread wants entries to be collapsed in a different tree */
        sort__need_collapse = 1;
 
+       if (top.use_stdio)
+               use_browser = 0;
+       else if (top.use_tui)
+               use_browser = 1;
+
+       setup_browser(false);
+
        if (setup_sorting(top.evlist) < 0) {
                if (sort_order)
                        parse_options_usage(top_usage, options, "s", 1);
@@ -1252,13 +1276,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
                goto out_delete_evlist;
        }
 
-       if (top.use_stdio)
-               use_browser = 0;
-       else if (top.use_tui)
-               use_browser = 1;
-
-       setup_browser(false);
-
        status = target__validate(target);
        if (status) {
                target__strerror(target, status, errbuf, BUFSIZ);
index 20916dd77aac24847bffbaa5d1c0fb56580d6e39..8dc98c598b1aed734ab6a6695c7308f3c16e0910 100644 (file)
@@ -33,6 +33,7 @@
 #include "util/stat.h"
 #include "trace-event.h"
 #include "util/parse-events.h"
+#include "util/bpf-loader.h"
 
 #include <libaudit.h>
 #include <stdlib.h>
@@ -1724,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 
        sc->args = sc->tp_format->format.fields;
        sc->nr_args = sc->tp_format->format.nr_fields;
-       /* drop nr field - not relevant here; does not exist on older kernels */
-       if (sc->args && strcmp(sc->args->name, "nr") == 0) {
+       /*
+        * We need to check and discard the first variable '__syscall_nr'
+        * or 'nr' that mean the syscall number. It is needless here.
+        * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+        */
+       if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
                sc->args = sc->args->next;
                --sc->nr_args;
        }
@@ -2177,6 +2182,37 @@ out_dump:
        return 0;
 }
 
+static void bpf_output__printer(enum binary_printer_ops op,
+                               unsigned int val, void *extra)
+{
+       FILE *output = extra;
+       unsigned char ch = (unsigned char)val;
+
+       switch (op) {
+       case BINARY_PRINT_CHAR_DATA:
+               fprintf(output, "%c", isprint(ch) ? ch : '.');
+               break;
+       case BINARY_PRINT_DATA_BEGIN:
+       case BINARY_PRINT_LINE_BEGIN:
+       case BINARY_PRINT_ADDR:
+       case BINARY_PRINT_NUM_DATA:
+       case BINARY_PRINT_NUM_PAD:
+       case BINARY_PRINT_SEP:
+       case BINARY_PRINT_CHAR_PAD:
+       case BINARY_PRINT_LINE_END:
+       case BINARY_PRINT_DATA_END:
+       default:
+               break;
+       }
+}
+
+static void bpf_output__fprintf(struct trace *trace,
+                               struct perf_sample *sample)
+{
+       print_binary(sample->raw_data, sample->raw_size, 8,
+                    bpf_output__printer, trace->output);
+}
+
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
                                union perf_event *event __maybe_unused,
                                struct perf_sample *sample)
@@ -2189,7 +2225,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 
        fprintf(trace->output, "%s:", evsel->name);
 
-       if (evsel->tp_format) {
+       if (perf_evsel__is_bpf_output(evsel)) {
+               bpf_output__fprintf(trace, sample);
+       } else if (evsel->tp_format) {
                event_format__fprintf(evsel->tp_format, sample->cpu,
                                      sample->raw_data, sample->raw_size,
                                      trace->output);
@@ -2586,6 +2624,16 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (err < 0)
                goto out_error_open;
 
+       err = bpf__apply_obj_config();
+       if (err) {
+               char errbuf[BUFSIZ];
+
+               bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+               pr_err("ERROR: Apply config to BPF failed: %s\n",
+                        errbuf);
+               goto out_error_open;
+       }
+
        /*
         * Better not use !target__has_task() here because we need to cover the
         * case where no threads were specified in the command line, but a
index 511141b102e8464fe3e102007f792220306ad3b9..eca6a912e8c22b0df342a7d955c6bad2db5ddf47 100644 (file)
@@ -61,50 +61,45 @@ endif
 
 ifeq ($(LIBUNWIND_LIBS),)
   NO_LIBUNWIND := 1
-else
-  #
-  # For linking with debug library, run like:
-  #
-  #   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
-  #
-  ifdef LIBUNWIND_DIR
-    LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
-    LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
-  endif
-  LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
-
-  # Set per-feature check compilation flags
-  FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
-  FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 endif
+#
+# For linking with debug library, run like:
+#
+#   make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
+#
+ifdef LIBUNWIND_DIR
+  LIBUNWIND_CFLAGS  = -I$(LIBUNWIND_DIR)/include
+  LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+endif
+LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS)
+
+# Set per-feature check compilation flags
+FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS)
+FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS)
 
 ifeq ($(NO_PERF_REGS),0)
   CFLAGS += -DHAVE_PERF_REGS_SUPPORT
 endif
 
-ifndef NO_LIBELF
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBDW_DIR=/opt/libdw/
-  ifdef LIBDW_DIR
-    LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
-    LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
+# for linking with debug library, run like:
+# make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ifdef LIBDW_DIR
+  LIBDW_CFLAGS  := -I$(LIBDW_DIR)/include
+  LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
 
-ifdef LIBBABELTRACE
-  # for linking with debug library, run like:
-  # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
-  ifdef LIBBABELTRACE_DIR
-    LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
-    LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
-  endif
-  FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
-  FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+# for linking with debug library, run like:
+# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
+ifdef LIBBABELTRACE_DIR
+  LIBBABELTRACE_CFLAGS  := -I$(LIBBABELTRACE_DIR)/include
+  LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
 endif
+FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
 FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
 # include ARCH specific config
@@ -145,28 +140,26 @@ ifdef PARSER_DEBUG
   $(call detected_var,PARSER_DEBUG_FLEX)
 endif
 
-ifndef NO_LIBPYTHON
-  # Try different combinations to accommodate systems that only have
-  # python[2][-config] in weird combinations but always preferring
-  # python2 and python2-config as per pep-0394. If we catch a
-  # python[-config] in version 3, the version check will kill it.
-  PYTHON2 := $(if $(call get-executable,python2),python2,python)
-  override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
-  PYTHON2_CONFIG := \
-    $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
-  override PYTHON_CONFIG := \
-    $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+# Try different combinations to accommodate systems that only have
+# python[2][-config] in weird combinations but always preferring
+# python2 and python2-config as per pep-0394. If we catch a
+# python[-config] in version 3, the version check will kill it.
+PYTHON2 := $(if $(call get-executable,python2),python2,python)
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
+PYTHON2_CONFIG := \
+  $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
+override PYTHON_CONFIG := \
+  $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
 
-  PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 
-  PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
-  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
 
-  FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
-  FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
-  FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
-endif
+FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
+FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
 
 CFLAGS += -fno-omit-frame-pointer
 CFLAGS += -ggdb3
@@ -335,6 +328,13 @@ ifndef NO_LIBELF
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
+ifdef PERF_HAVE_JITDUMP
+  ifndef NO_DWARF
+    $(call detected,CONFIG_JITDUMP)
+    CFLAGS += -DHAVE_JITDUMP
+  endif
+endif
+
 ifeq ($(ARCH),powerpc)
   ifndef NO_DWARF
     CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
@@ -411,6 +411,17 @@ ifndef NO_LIBAUDIT
   endif
 endif
 
+ifndef NO_LIBCRYPTO
+  ifneq ($(feature-libcrypto), 1)
+    msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev);
+    NO_LIBCRYPTO := 1
+  else
+    CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
+    EXTLIBS += -lcrypto
+    $(call detected,CONFIG_CRYPTO)
+  endif
+endif
+
 ifdef NO_NEWT
   NO_SLANG=1
 endif
diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile
new file mode 100644 (file)
index 0000000..5ce61a1
--- /dev/null
@@ -0,0 +1,89 @@
+ARCH=$(shell uname -m)
+
+ifeq ($(ARCH), x86_64)
+JARCH=amd64
+endif
+ifeq ($(ARCH), armv7l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), armv6l)
+JARCH=armhf
+endif
+ifeq ($(ARCH), aarch64)
+JARCH=aarch64
+endif
+ifeq ($(ARCH), ppc64)
+JARCH=powerpc
+endif
+ifeq ($(ARCH), ppc64le)
+JARCH=powerpc
+endif
+
+DESTDIR=/usr/local
+
+VERSION=1
+REVISION=0
+AGE=0
+
+LN=ln -sf
+RM=rm
+
+SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE)
+VLIBJVMTI=libjvmti.so.$(VERSION)
+SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI)
+SOLIBEXT=so
+
+# The following works at least on fedora 23, you may need the next
+# line for other distros.
+ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
+else
+  ifneq (,$(wildcard /usr/sbin/alternatives))
+    JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+  endif
+endif
+ifndef JDIR
+$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory)
+else
+  ifeq (,$(wildcard $(JDIR)/include/jvmti.h))
+  $(error the openjdk development package appears to me missing, install and try again)
+  endif
+endif
+$(info Using Java from $(JDIR))
+# -lrt required in 32-bit mode for clock_gettime()
+LIBS=-lelf -lrt
+INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux
+
+TARGETS=$(SLIBJVMTI)
+
+SRCS=libjvmti.c jvmti_agent.c
+OBJS=$(SRCS:.c=.o)
+SOBJS=$(OBJS:.o=.lo)
+OPT=-O2 -g -Werror -Wall
+
+CFLAGS=$(INCDIR) $(OPT)
+
+all: $(TARGETS)
+
+.c.o:
+       $(CC) $(CFLAGS) -c $*.c
+.c.lo:
+       $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo
+
+$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h
+
+$(SLIBJVMTI):  $(SOBJS)
+       $(CC) $(CFLAGS) $(SLDFLAGS)  -o $@ $(SOBJS) $(LIBS)
+       $(LN) $@ libjvmti.$(SOLIBEXT)
+
+clean:
+       $(RM) -f *.o *.so.* *.so *.lo
+
+install:
+       -mkdir -p $(DESTDIR)/lib
+       install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/
+       (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI))
+       (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT))
+       ldconfig
+
+.SUFFIXES: .c .S .o .lo
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
new file mode 100644 (file)
index 0000000..6461e02
--- /dev/null
@@ -0,0 +1,465 @@
+/*
+ * jvmti_agent.c: JVMTI agent interface
+ *
+ * Adapted from the Oprofile code in opagent.c:
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#include <sys/types.h>
+#include <sys/stat.h> /* for mkdir() */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <syscall.h> /* for gettid() */
+#include <err.h>
+
+#include "jvmti_agent.h"
+#include "../util/jitdump.h"
+
+#define JIT_LANG "java"
+
+static char jit_path[PATH_MAX];
+static void *marker_addr;
+
+/*
+ * padding buffer
+ */
+static const char pad_bytes[7];
+
+static inline pid_t gettid(void)
+{
+       return (pid_t)syscall(__NR_gettid);
+}
+
+static int get_e_machine(struct jitheader *hdr)
+{
+       ssize_t sret;
+       char id[16];
+       int fd, ret = -1;
+       int m = -1;
+       struct {
+               uint16_t e_type;
+               uint16_t e_machine;
+       } info;
+
+       fd = open("/proc/self/exe", O_RDONLY);
+       if (fd == -1)
+               return -1;
+
+       sret = read(fd, id, sizeof(id));
+       if (sret != sizeof(id))
+               goto error;
+
+       /* check ELF signature */
+       if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F')
+               goto error;
+
+       sret = read(fd, &info, sizeof(info));
+       if (sret != sizeof(info))
+               goto error;
+
+       m = info.e_machine;
+       if (m < 0)
+               m = 0; /* ELF EM_NONE */
+
+       hdr->elf_mach = m;
+       ret = 0;
+error:
+       close(fd);
+       return ret;
+}
+
+#define NSEC_PER_SEC   1000000000
+static int perf_clk_id = CLOCK_MONOTONIC;
+
+static inline uint64_t
+timespec_to_ns(const struct timespec *ts)
+{
+        return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline uint64_t
+perf_get_timestamp(void)
+{
+       struct timespec ts;
+       int ret;
+
+       ret = clock_gettime(perf_clk_id, &ts);
+       if (ret)
+               return 0;
+
+       return timespec_to_ns(&ts);
+}
+
+static int
+debug_cache_init(void)
+{
+       char str[32];
+       char *base, *p;
+       struct tm tm;
+       time_t t;
+       int ret;
+
+       time(&t);
+       localtime_r(&t, &tm);
+
+       base = getenv("JITDUMPDIR");
+       if (!base)
+               base = getenv("HOME");
+       if (!base)
+               base = ".";
+
+       strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
+
+       snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
+
+       ret = mkdir(jit_path, 0755);
+       if (ret == -1) {
+               if (errno != EEXIST) {
+                       warn("jvmti: cannot create jit cache dir %s", jit_path);
+                       return -1;
+               }
+       }
+
+       snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+       ret = mkdir(jit_path, 0755);
+       if (ret == -1) {
+               if (errno != EEXIST) {
+                       warn("cannot create jit cache dir %s", jit_path);
+                       return -1;
+               }
+       }
+
+       snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+
+       p = mkdtemp(jit_path);
+       if (p != jit_path) {
+               warn("cannot create jit cache dir %s", jit_path);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+perf_open_marker_file(int fd)
+{
+       long pgsz;
+
+       pgsz = sysconf(_SC_PAGESIZE);
+       if (pgsz == -1)
+               return -1;
+
+       /*
+        * we mmap the jitdump to create an MMAP RECORD in perf.data file.
+        * The mmap is captured either live (perf record running when we mmap)
+        * or  in deferred mode, via /proc/PID/maps
+        * the MMAP record is used as a marker of a jitdump file for more meta
+        * data info about the jitted code. Perf report/annotate detect this
+        * special filename and process the jitdump file.
+        *
+        * mapping must be PROT_EXEC to ensure it is captured by perf record
+        * even when not using -d option
+        */
+       marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0);
+       return (marker_addr == MAP_FAILED) ? -1 : 0;
+}
+
+static void
+perf_close_marker_file(void)
+{
+       long pgsz;
+
+       if (!marker_addr)
+               return;
+
+       pgsz = sysconf(_SC_PAGESIZE);
+       if (pgsz == -1)
+               return;
+
+       munmap(marker_addr, pgsz);
+}
+
+void *jvmti_open(void)
+{
+       int pad_cnt;
+       char dump_path[PATH_MAX];
+       struct jitheader header;
+       int fd;
+       FILE *fp;
+
+       /*
+        * check if clockid is supported
+        */
+       if (!perf_get_timestamp())
+               warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+
+       memset(&header, 0, sizeof(header));
+
+       debug_cache_init();
+
+       /*
+        * jitdump file name
+        */
+       snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+
+       fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
+       if (fd == -1)
+               return NULL;
+
+       /*
+        * create perf.data maker for the jitdump file
+        */
+       if (perf_open_marker_file(fd)) {
+               warnx("jvmti: failed to create marker file");
+               return NULL;
+       }
+
+       fp = fdopen(fd, "w+");
+       if (!fp) {
+               warn("jvmti: cannot create %s", dump_path);
+               close(fd);
+               goto error;
+       }
+
+       warnx("jvmti: jitdump in %s", dump_path);
+
+       if (get_e_machine(&header)) {
+               warn("get_e_machine failed\n");
+               goto error;
+       }
+
+       header.magic      = JITHEADER_MAGIC;
+       header.version    = JITHEADER_VERSION;
+       header.total_size = sizeof(header);
+       header.pid        = getpid();
+
+       /* calculate amount of padding '\0' */
+       pad_cnt = PADDING_8ALIGNED(header.total_size);
+       header.total_size += pad_cnt;
+
+       header.timestamp = perf_get_timestamp();
+
+       if (!fwrite(&header, sizeof(header), 1, fp)) {
+               warn("jvmti: cannot write dumpfile header");
+               goto error;
+       }
+
+       /* write padding '\0' if necessary */
+       if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) {
+               warn("jvmti: cannot write dumpfile header padding");
+               goto error;
+       }
+
+       return fp;
+error:
+       fclose(fp);
+       return NULL;
+}
+
+int
+jvmti_close(void *agent)
+{
+       struct jr_code_close rec;
+       FILE *fp = agent;
+
+       if (!fp) {
+               warnx("jvmti: incalid fd in close_agent");
+               return -1;
+       }
+
+       rec.p.id = JIT_CODE_CLOSE;
+       rec.p.total_size = sizeof(rec);
+
+       rec.p.timestamp = perf_get_timestamp();
+
+       if (!fwrite(&rec, sizeof(rec), 1, fp))
+               return -1;
+
+       fclose(fp);
+
+       fp = NULL;
+
+       perf_close_marker_file();
+
+       return 0;
+}
+
+int
+jvmti_write_code(void *agent, char const *sym,
+       uint64_t vma, void const *code, unsigned int const size)
+{
+       static int code_generation = 1;
+       struct jr_code_load rec;
+       size_t sym_len;
+       size_t padding_count;
+       FILE *fp = agent;
+       int ret = -1;
+
+       /* don't care about 0 length function, no samples */
+       if (size == 0)
+               return 0;
+
+       if (!fp) {
+               warnx("jvmti: invalid fd in write_native_code");
+               return -1;
+       }
+
+       sym_len = strlen(sym) + 1;
+
+       rec.p.id           = JIT_CODE_LOAD;
+       rec.p.total_size   = sizeof(rec) + sym_len;
+       padding_count      = PADDING_8ALIGNED(rec.p.total_size);
+       rec.p. total_size += padding_count;
+       rec.p.timestamp    = perf_get_timestamp();
+
+       rec.code_size  = size;
+       rec.vma        = vma;
+       rec.code_addr  = vma;
+       rec.pid        = getpid();
+       rec.tid        = gettid();
+
+       if (code)
+               rec.p.total_size += size;
+
+       /*
+        * If JVM is multi-threaded, nultiple concurrent calls to agent
+        * may be possible, so protect file writes
+        */
+       flockfile(fp);
+
+       /*
+        * get code index inside lock to avoid race condition
+        */
+       rec.code_index = code_generation++;
+
+       ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+       fwrite_unlocked(sym, sym_len, 1, fp);
+
+       if (padding_count)
+               fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+
+       if (code)
+               fwrite_unlocked(code, size, 1, fp);
+
+       funlockfile(fp);
+
+       ret = 0;
+
+       return ret;
+}
+
+int
+jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
+                      jvmti_line_info_t *li, int nr_lines)
+{
+       struct jr_code_debug_info rec;
+       size_t sret, len, size, flen;
+       size_t padding_count;
+       uint64_t addr;
+       const char *fn = file;
+       FILE *fp = agent;
+       int i;
+
+       /*
+        * no entry to write
+        */
+       if (!nr_lines)
+               return 0;
+
+       if (!fp) {
+               warnx("jvmti: invalid fd in write_debug_info");
+               return -1;
+       }
+
+       flen = strlen(file) + 1;
+
+       rec.p.id        = JIT_CODE_DEBUG_INFO;
+       size            = sizeof(rec);
+       rec.p.timestamp = perf_get_timestamp();
+       rec.code_addr   = (uint64_t)(uintptr_t)code;
+       rec.nr_entry    = nr_lines;
+
+       /*
+        * on disk source line info layout:
+        * uint64_t : addr
+        * int      : line number
+        * int      : column discriminator
+        * file[]   : source file name
+        * padding  : pad to multiple of 8 bytes
+        */
+       size += nr_lines * sizeof(struct debug_entry);
+       size += flen * nr_lines;
+       /*
+        * pad to 8 bytes
+        */
+       padding_count = PADDING_8ALIGNED(size);
+
+       rec.p.total_size = size + padding_count;
+
+       /*
+        * If JVM is multi-threaded, nultiple concurrent calls to agent
+        * may be possible, so protect file writes
+        */
+       flockfile(fp);
+
+       sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+       if (sret != 1)
+               goto error;
+
+       for (i = 0; i < nr_lines; i++) {
+
+               addr = (uint64_t)li[i].pc;
+               len  = sizeof(addr);
+               sret = fwrite_unlocked(&addr, len, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+               len  = sizeof(li[0].line_number);
+               sret = fwrite_unlocked(&li[i].line_number, len, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+               len  = sizeof(li[0].discrim);
+               sret = fwrite_unlocked(&li[i].discrim, len, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+               sret = fwrite_unlocked(fn, flen, 1, fp);
+               if (sret != 1)
+                       goto error;
+       }
+       if (padding_count)
+               sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp);
+               if (sret != 1)
+                       goto error;
+
+       funlockfile(fp);
+       return 0;
+error:
+       funlockfile(fp);
+       return -1;
+}
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
new file mode 100644 (file)
index 0000000..bedf5d0
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef __JVMTI_AGENT_H__
+#define __JVMTI_AGENT_H__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <jvmti.h>
+
+#define __unused __attribute__((unused))
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct {
+       unsigned long   pc;
+       int             line_number;
+       int             discrim; /* discriminator -- 0 for now */
+} jvmti_line_info_t;
+
+void *jvmti_open(void);
+int   jvmti_close(void *agent);
+int   jvmti_write_code(void *agent, char const *symbol_name,
+                      uint64_t vma, void const *code,
+                      const unsigned int code_size);
+
+int   jvmti_write_debug_info(void *agent,
+                            uint64_t code,
+                            const char *file,
+                            jvmti_line_info_t *li,
+                            int nr_lines);
+
+#if defined(__cplusplus)
+}
+
+#endif
+#endif /* __JVMTI_H__ */
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
new file mode 100644 (file)
index 0000000..ac12e4b
--- /dev/null
@@ -0,0 +1,304 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <err.h>
+#include <jvmti.h>
+#include <jvmticmlr.h>
+#include <limits.h>
+
+#include "jvmti_agent.h"
+
+static int has_line_numbers;
+void *jvmti_agent;
+
+static jvmtiError
+do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+                   jvmti_line_info_t *tab, jint *nr)
+{
+       jint i, lines = 0;
+       jint nr_lines = 0;
+       jvmtiLineNumberEntry *loc_tab = NULL;
+       jvmtiError ret;
+
+       ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
+       if (ret != JVMTI_ERROR_NONE)
+               return ret;
+
+       for (i = 0; i < nr_lines; i++) {
+               if (loc_tab[i].start_location < bci) {
+                       tab[lines].pc = (unsigned long)pc;
+                       tab[lines].line_number = loc_tab[i].line_number;
+                       tab[lines].discrim = 0; /* not yet used */
+                       lines++;
+               } else {
+                       break;
+               }
+       }
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
+       *nr = lines;
+       return JVMTI_ERROR_NONE;
+}
+
+static jvmtiError
+get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines)
+{
+       const jvmtiCompiledMethodLoadRecordHeader *hdr;
+       jvmtiCompiledMethodLoadInlineRecord *rec;
+       jvmtiLineNumberEntry *lne = NULL;
+       PCStackInfo *c;
+       jint nr, ret;
+       int nr_total = 0;
+       int i, lines_total = 0;
+
+       if (!(tab && nr_lines))
+               return JVMTI_ERROR_NULL_POINTER;
+
+       /*
+        * Phase 1 -- get the number of lines necessary
+        */
+       for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+               if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+                       rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+                       for (i = 0; i < rec->numpcs; i++) {
+                               c = rec->pcinfo + i;
+                               nr = 0;
+                               /*
+                                * unfortunately, need a tab to get the number of lines!
+                                */
+                               ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
+                               if (ret == JVMTI_ERROR_NONE) {
+                                       /* free what was allocated for nothing */
+                                       (*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
+                                       nr_total += (int)nr;
+                               }
+                       }
+               }
+       }
+
+       if (nr_total == 0)
+               return JVMTI_ERROR_NOT_FOUND;
+
+       /*
+        * Phase 2 -- allocate big enough line table
+        */
+       *tab = malloc(nr_total * sizeof(**tab));
+       if (!*tab)
+               return JVMTI_ERROR_OUT_OF_MEMORY;
+
+       for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+               if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+                       rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+                       for (i = 0; i < rec->numpcs; i++) {
+                               c = rec->pcinfo + i;
+                               nr = 0;
+                               ret = do_get_line_numbers(jvmti, c->pc,
+                                                         c->methods[0],
+                                                         c->bcis[0],
+                                                         *tab + lines_total,
+                                                         &nr);
+                               if (ret == JVMTI_ERROR_NONE)
+                                       lines_total += nr;
+                       }
+               }
+       }
+       *nr_lines = lines_total;
+       return JVMTI_ERROR_NONE;
+}
+
+static void JNICALL
+compiled_method_load_cb(jvmtiEnv *jvmti,
+                       jmethodID method,
+                       jint code_size,
+                       void const *code_addr,
+                       jint map_length,
+                       jvmtiAddrLocationMap const *map,
+                       const void *compile_info)
+{
+       jvmti_line_info_t *line_tab = NULL;
+       jclass decl_class;
+       char *class_sign = NULL;
+       char *func_name = NULL;
+       char *func_sign = NULL;
+       char *file_name= NULL;
+       char fn[PATH_MAX];
+       uint64_t addr = (uint64_t)(uintptr_t)code_addr;
+       jvmtiError ret;
+       int nr_lines = 0; /* in line_tab[] */
+       size_t len;
+
+       ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
+                                               &decl_class);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: cannot get declaring class");
+               return;
+       }
+
+       if (has_line_numbers && map && map_length) {
+               ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
+               if (ret != JVMTI_ERROR_NONE) {
+                       warnx("jvmti: cannot get line table for method");
+                       nr_lines = 0;
+               }
+       }
+
+       ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: cannot get source filename ret=%d", ret);
+               goto error;
+       }
+
+       ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
+                                         &class_sign, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: getclassignature failed");
+               goto error;
+       }
+
+       ret = (*jvmti)->GetMethodName(jvmti, method, &func_name,
+                                     &func_sign, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: failed getmethodname");
+               goto error;
+       }
+
+       /*
+        * Assume path name is class hierarchy, this is a common practice with Java programs
+        */
+       if (*class_sign == 'L') {
+               int j, i = 0;
+               char *p = strrchr(class_sign, '/');
+               if (p) {
+                       /* drop the 'L' prefix and copy up to the final '/' */
+                       for (i = 0; i < (p - class_sign); i++)
+                               fn[i] = class_sign[i+1];
+               }
+               /*
+                * append file name, we use loops and not string ops to avoid modifying
+                * class_sign which is used later for the symbol name
+                */
+               for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
+                       fn[i] = file_name[j];
+               fn[i] = '\0';
+       } else {
+               /* fallback case */
+               strcpy(fn, file_name);
+       }
+       /*
+        * write source line info record if we have it
+        */
+       if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines))
+               warnx("jvmti: write_debug_info() failed");
+
+       len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
+       {
+               char str[len];
+               snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign);
+
+               if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size))
+                       warnx("jvmti: write_code() failed");
+       }
+error:
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+       free(line_tab);
+}
+
+static void JNICALL
+code_generated_cb(jvmtiEnv *jvmti,
+                 char const *name,
+                 void const *code_addr,
+                 jint code_size)
+{
+       uint64_t addr = (uint64_t)(unsigned long)code_addr;
+       int ret;
+
+       ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size);
+       if (ret)
+               warnx("jvmti: write_code() failed for code_generated");
+}
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused)
+{
+       jvmtiEventCallbacks cb;
+       jvmtiCapabilities caps1;
+       jvmtiJlocationFormat format;
+       jvmtiEnv *jvmti = NULL;
+       jint ret;
+
+       jvmti_agent = jvmti_open();
+       if (!jvmti_agent) {
+               warnx("jvmti: open_agent failed");
+               return -1;
+       }
+
+       /*
+        * Request a JVMTI interface version 1 environment
+        */
+       ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1);
+       if (ret != JNI_OK) {
+               warnx("jvmti: jvmti version 1 not supported");
+               return -1;
+       }
+
+       /*
+        * acquire method_load capability, we require it
+        * request line numbers (optional)
+        */
+       memset(&caps1, 0, sizeof(caps1));
+       caps1.can_generate_compiled_method_load_events = 1;
+
+       ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: acquire compiled_method capability failed");
+               return -1;
+       }
+       ret = (*jvmti)->GetJLocationFormat(jvmti, &format);
+        if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) {
+                memset(&caps1, 0, sizeof(caps1));
+                caps1.can_get_line_numbers = 1;
+                caps1.can_get_source_file_name = 1;
+               ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+                if (ret == JVMTI_ERROR_NONE)
+                        has_line_numbers = 1;
+        }
+
+       memset(&cb, 0, sizeof(cb));
+
+       cb.CompiledMethodLoad   = compiled_method_load_cb;
+       cb.DynamicCodeGenerated = code_generated_cb;
+
+       ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb));
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: cannot set event callbacks");
+               return -1;
+       }
+
+       ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+                       JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: setnotification failed for method_load");
+               return -1;
+       }
+
+       ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+                       JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               warnx("jvmti: setnotification failed on code_generated");
+               return -1;
+       }
+       return 0;
+}
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *jvm __unused)
+{
+       int ret;
+
+       ret = jvmti_close(jvmti_agent);
+       if (ret)
+               errx(1, "Error: op_close_agent()");
+}
index a929618b8eb616f90c9bb8e26bad5416a0fdbc19..aaee0a7827477810c5c0d2d82753545592d7f22f 100644 (file)
@@ -454,11 +454,12 @@ static void handle_internal_command(int argc, const char **argv)
 
 static void execv_dashed_external(const char **argv)
 {
-       struct strbuf cmd = STRBUF_INIT;
+       char *cmd;
        const char *tmp;
        int status;
 
-       strbuf_addf(&cmd, "perf-%s", argv[0]);
+       if (asprintf(&cmd, "perf-%s", argv[0]) < 0)
+               goto do_die;
 
        /*
         * argv[0] must be the perf command, but the argv array
@@ -467,7 +468,7 @@ static void execv_dashed_external(const char **argv)
         * restore it on error.
         */
        tmp = argv[0];
-       argv[0] = cmd.buf;
+       argv[0] = cmd;
 
        /*
         * if we fail because the command is not found, it is
@@ -475,15 +476,16 @@ static void execv_dashed_external(const char **argv)
         */
        status = run_command_v_opt(argv, 0);
        if (status != -ERR_RUN_COMMAND_EXEC) {
-               if (IS_RUN_COMMAND_ERR(status))
+               if (IS_RUN_COMMAND_ERR(status)) {
+do_die:
                        die("unable to run '%s'", argv[0]);
+               }
                exit(-status);
        }
        errno = ENOENT; /* as if we called execvp */
 
        argv[0] = tmp;
-
-       strbuf_release(&cmd);
+       zfree(&cmd);
 }
 
 static int run_argv(int *argcp, const char ***argv)
@@ -546,6 +548,8 @@ int main(int argc, const char **argv)
 
        srandom(time(NULL));
 
+       perf_config(perf_default_config, NULL);
+
        /* get debugfs/tracefs mount point from /proc/mounts */
        tracing_path_mount();
 
@@ -613,6 +617,8 @@ int main(int argc, const char **argv)
         */
        pthread__block_sigwinch();
 
+       perf_debug_setup();
+
        while (1) {
                static int done_help;
                int was_alias = run_argv(&argc, &argv);
index 90129accffbe824e37d9831ba3b323011295f78a..5381a01c0610c0e61f079140ed5cdc2df3f87b0d 100644 (file)
@@ -58,6 +58,8 @@ struct record_opts {
        bool         full_auxtrace;
        bool         auxtrace_snapshot_mode;
        bool         record_switch_events;
+       bool         all_kernel;
+       bool         all_user;
        unsigned int freq;
        unsigned int mmap_pages;
        unsigned int auxtrace_mmap_pages;
index 15c8400240fd9029ae34fca077304337d9c75ca6..1d95009592eb3a8a8d053e1340a3e42dd412a68c 100644 (file)
@@ -71,7 +71,10 @@ try:
 except:
        if not audit_package_warned:
                audit_package_warned = True
-               print "Install the audit-libs-python package to get syscall names"
+               print "Install the audit-libs-python package to get syscall names.\n" \
+                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
+                    "\n  # yum install audit-libs-python (Fedora)" \
+                    "\n  etc.\n"
 
 def syscall_name(id):
        try:
index bf016c439fbd10a3cada60253c98d60ce441aa98..8cc30e731c739495f3eb61a0da9a76246ad35600 100644 (file)
@@ -1,3 +1,4 @@
 llvm-src-base.c
 llvm-src-kbuild.c
 llvm-src-prologue.c
+llvm-src-relocation.c
index 614899b88b377e07f9615d618ba7045f66051bea..1ba628ed049adbafc27c7b8900ecb838165a2aa7 100644 (file)
@@ -31,7 +31,7 @@ perf-y += sample-parsing.o
 perf-y += parse-no-sample-id-all.o
 perf-y += kmod-path.o
 perf-y += thread-map.o
-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
 perf-y += bpf.o
 perf-y += topology.o
 perf-y += cpumap.o
@@ -59,6 +59,13 @@ $(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build
        $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
        $(Q)echo ';' >> $@
 
+$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build
+       $(call rule_mkdir)
+       $(Q)echo '#include <tests/llvm.h>' > $@
+       $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@
+       $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+       $(Q)echo ';' >> $@
+
 ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
index fb80c9eb6a95b67947b23adaeca69b5e57bac704..e7664fe3bd33739fd92be2579c30102e481e8f03 100644 (file)
 
 static int fd1;
 static int fd2;
+static int fd3;
 static int overflows;
+static int overflows_2;
+
+volatile long the_var;
+
+
+/*
+ * Use ASM to ensure watchpoint and breakpoint can be triggered
+ * at one instruction.
+ */
+#if defined (__x86_64__)
+extern void __test_function(volatile long *ptr);
+asm (
+       ".globl __test_function\n"
+       "__test_function:\n"
+       "incq (%rdi)\n"
+       "ret\n");
+#elif defined (__aarch64__)
+extern void __test_function(volatile long *ptr);
+asm (
+       ".globl __test_function\n"
+       "__test_function:\n"
+       "str x30, [x0]\n"
+       "ret\n");
+
+#else
+static void __test_function(volatile long *ptr)
+{
+       *ptr = 0x1234;
+}
+#endif
 
 __attribute__ ((noinline))
 static int test_function(void)
 {
+       __test_function(&the_var);
+       the_var++;
        return time(NULL);
 }
 
+static void sig_handler_2(int signum __maybe_unused,
+                         siginfo_t *oh __maybe_unused,
+                         void *uc __maybe_unused)
+{
+       overflows_2++;
+       if (overflows_2 > 10) {
+               ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+               ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+               ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+       }
+}
+
 static void sig_handler(int signum __maybe_unused,
                        siginfo_t *oh __maybe_unused,
                        void *uc __maybe_unused)
@@ -54,10 +99,11 @@ static void sig_handler(int signum __maybe_unused,
                 */
                ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
                ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+               ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
        }
 }
 
-static int bp_event(void *fn, int setup_signal)
+static int __event(bool is_x, void *addr, int sig)
 {
        struct perf_event_attr pe;
        int fd;
@@ -67,8 +113,8 @@ static int bp_event(void *fn, int setup_signal)
        pe.size = sizeof(struct perf_event_attr);
 
        pe.config = 0;
-       pe.bp_type = HW_BREAKPOINT_X;
-       pe.bp_addr = (unsigned long) fn;
+       pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+       pe.bp_addr = (unsigned long) addr;
        pe.bp_len = sizeof(long);
 
        pe.sample_period = 1;
@@ -86,17 +132,25 @@ static int bp_event(void *fn, int setup_signal)
                return TEST_FAIL;
        }
 
-       if (setup_signal) {
-               fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
-               fcntl(fd, F_SETSIG, SIGIO);
-               fcntl(fd, F_SETOWN, getpid());
-       }
+       fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+       fcntl(fd, F_SETSIG, sig);
+       fcntl(fd, F_SETOWN, getpid());
 
        ioctl(fd, PERF_EVENT_IOC_RESET, 0);
 
        return fd;
 }
 
+static int bp_event(void *addr, int sig)
+{
+       return __event(true, addr, sig);
+}
+
+static int wp_event(void *addr, int sig)
+{
+       return __event(false, addr, sig);
+}
+
 static long long bp_count(int fd)
 {
        long long count;
@@ -114,7 +168,7 @@ static long long bp_count(int fd)
 int test__bp_signal(int subtest __maybe_unused)
 {
        struct sigaction sa;
-       long long count1, count2;
+       long long count1, count2, count3;
 
        /* setup SIGIO signal handler */
        memset(&sa, 0, sizeof(struct sigaction));
@@ -126,21 +180,52 @@ int test__bp_signal(int subtest __maybe_unused)
                return TEST_FAIL;
        }
 
+       sa.sa_sigaction = (void *) sig_handler_2;
+       if (sigaction(SIGUSR1, &sa, NULL) < 0) {
+               pr_debug("failed setting up signal handler 2\n");
+               return TEST_FAIL;
+       }
+
        /*
         * We create following events:
         *
-        * fd1 - breakpoint event on test_function with SIGIO
+        * fd1 - breakpoint event on __test_function with SIGIO
         *       signal configured. We should get signal
         *       notification each time the breakpoint is hit
         *
-        * fd2 - breakpoint event on sig_handler without SIGIO
+        * fd2 - breakpoint event on sig_handler with SIGUSR1
+        *       configured. We should get SIGUSR1 each time when
+        *       breakpoint is hit
+        *
+        * fd3 - watchpoint event on __test_function with SIGIO
         *       configured.
         *
         * Following processing should happen:
-        *   - execute test_function
-        *   - fd1 event breakpoint hit -> count1 == 1
-        *   - SIGIO is delivered       -> overflows == 1
-        *   - fd2 event breakpoint hit -> count2 == 1
+        *   Exec:               Action:                       Result:
+        *   incq (%rdi)       - fd1 event breakpoint hit   -> count1 == 1
+        *                     - SIGIO is delivered
+        *   sig_handler       - fd2 event breakpoint hit   -> count2 == 1
+        *                     - SIGUSR1 is delivered
+        *   sig_handler_2                                  -> overflows_2 == 1  (nested signal)
+        *   sys_rt_sigreturn  - return from sig_handler_2
+        *   overflows++                                    -> overflows = 1
+        *   sys_rt_sigreturn  - return from sig_handler
+        *   incq (%rdi)       - fd3 event watchpoint hit   -> count3 == 1       (wp and bp in one insn)
+        *                     - SIGIO is delivered
+        *   sig_handler       - fd2 event breakpoint hit   -> count2 == 2
+        *                     - SIGUSR1 is delivered
+        *   sig_handler_2                                  -> overflows_2 == 2  (nested signal)
+        *   sys_rt_sigreturn  - return from sig_handler_2
+        *   overflows++                                    -> overflows = 2
+        *   sys_rt_sigreturn  - return from sig_handler
+        *   the_var++         - fd3 event watchpoint hit   -> count3 == 2       (standalone watchpoint)
+        *                     - SIGIO is delivered
+        *   sig_handler       - fd2 event breakpoint hit   -> count2 == 3
+        *                     - SIGUSR1 is delivered
+        *   sig_handler_2                                  -> overflows_2 == 3  (nested signal)
+        *   sys_rt_sigreturn  - return from sig_handler_2
+        *   overflows++                                    -> overflows == 3
+        *   sys_rt_sigreturn  - return from sig_handler
         *
         * The test case check following error conditions:
         * - we get stuck in signal handler because of debug
@@ -152,11 +237,13 @@ int test__bp_signal(int subtest __maybe_unused)
         *
         */
 
-       fd1 = bp_event(test_function, 1);
-       fd2 = bp_event(sig_handler, 0);
+       fd1 = bp_event(__test_function, SIGIO);
+       fd2 = bp_event(sig_handler, SIGUSR1);
+       fd3 = wp_event((void *)&the_var, SIGIO);
 
        ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
        ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
+       ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
 
        /*
         * Kick off the test by trigering 'fd1'
@@ -166,15 +253,18 @@ int test__bp_signal(int subtest __maybe_unused)
 
        ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
        ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+       ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
 
        count1 = bp_count(fd1);
        count2 = bp_count(fd2);
+       count3 = bp_count(fd3);
 
        close(fd1);
        close(fd2);
+       close(fd3);
 
-       pr_debug("count1 %lld, count2 %lld, overflow %d\n",
-                count1, count2, overflows);
+       pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n",
+                count1, count2, count3, overflows, overflows_2);
 
        if (count1 != 1) {
                if (count1 == 11)
@@ -183,12 +273,18 @@ int test__bp_signal(int subtest __maybe_unused)
                        pr_debug("failed: wrong count for bp1%lld\n", count1);
        }
 
-       if (overflows != 1)
+       if (overflows != 3)
                pr_debug("failed: wrong overflow hit\n");
 
-       if (count2 != 1)
+       if (overflows_2 != 3)
+               pr_debug("failed: wrong overflow_2 hit\n");
+
+       if (count2 != 3)
                pr_debug("failed: wrong count for bp2\n");
 
-       return count1 == 1 && overflows == 1 && count2 == 1 ?
+       if (count3 != 2)
+               pr_debug("failed: wrong count for bp3\n");
+
+       return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
                TEST_OK : TEST_FAIL;
 }
diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
new file mode 100644 (file)
index 0000000..93af774
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * bpf-script-test-relocation.c
+ * Test BPF loader checking relocation
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+       (void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+       (void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+       unsigned int type;
+       unsigned int key_size;
+       unsigned int value_size;
+       unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") my_table = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1,
+};
+
+int this_is_a_global_val;
+
+SEC("func=sys_write")
+int bpf_func__sys_write(void *ctx)
+{
+       int key = 0;
+       int value = 0;
+
+       /*
+        * Incorrect relocation. Should not allow this program be
+        * loaded into kernel.
+        */
+       bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0);
+       return 0;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
index 33689a0cf821e5b9608e54b981e3280ee580890c..199501c71e272491850065910aae5003603ab10e 100644 (file)
@@ -1,7 +1,11 @@
 #include <stdio.h>
 #include <sys/epoll.h>
+#include <util/util.h>
 #include <util/bpf-loader.h>
 #include <util/evlist.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <bpf/bpf.h>
 #include "tests.h"
 #include "llvm.h"
 #include "debug.h"
@@ -71,6 +75,15 @@ static struct {
                (NR_ITERS + 1) / 4,
        },
 #endif
+       {
+               LLVM_TESTCASE_BPF_RELOCATION,
+               "Test BPF relocation checker",
+               "[bpf_relocation_test]",
+               "fix 'perf test LLVM' first",
+               "libbpf error when dealing with relocation",
+               NULL,
+               0,
+       },
 };
 
 static int do_test(struct bpf_object *obj, int (*func)(void),
@@ -99,7 +112,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
        parse_evlist.error = &parse_error;
        INIT_LIST_HEAD(&parse_evlist.list);
 
-       err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj);
+       err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL);
        if (err || list_empty(&parse_evlist.list)) {
                pr_debug("Failed to add events selected by BPF\n");
                return TEST_FAIL;
@@ -190,7 +203,7 @@ static int __test__bpf(int idx)
 
        ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
                                       bpf_testcase_table[idx].prog_id,
-                                      true);
+                                      true, NULL);
        if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
                pr_debug("Unable to get BPF object, %s\n",
                         bpf_testcase_table[idx].msg_compile_fail);
@@ -202,14 +215,21 @@ static int __test__bpf(int idx)
 
        obj = prepare_bpf(obj_buf, obj_buf_sz,
                          bpf_testcase_table[idx].name);
-       if (!obj) {
+       if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) {
+               if (!obj)
+                       pr_debug("Fail to load BPF object: %s\n",
+                                bpf_testcase_table[idx].msg_load_fail);
+               else
+                       pr_debug("Success unexpectedly: %s\n",
+                                bpf_testcase_table[idx].msg_load_fail);
                ret = TEST_FAIL;
                goto out;
        }
 
-       ret = do_test(obj,
-                     bpf_testcase_table[idx].target_func,
-                     bpf_testcase_table[idx].expect_result);
+       if (obj)
+               ret = do_test(obj,
+                             bpf_testcase_table[idx].target_func,
+                             bpf_testcase_table[idx].expect_result);
 out:
        bpf__clear();
        return ret;
@@ -227,6 +247,36 @@ const char *test__bpf_subtest_get_desc(int i)
        return bpf_testcase_table[i].desc;
 }
 
+static int check_env(void)
+{
+       int err;
+       unsigned int kver_int;
+       char license[] = "GPL";
+
+       struct bpf_insn insns[] = {
+               BPF_MOV64_IMM(BPF_REG_0, 1),
+               BPF_EXIT_INSN(),
+       };
+
+       err = fetch_kernel_version(&kver_int, NULL, 0);
+       if (err) {
+               pr_debug("Unable to get kernel version\n");
+               return err;
+       }
+
+       err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+                              sizeof(insns) / sizeof(insns[0]),
+                              license, kver_int, NULL, 0);
+       if (err < 0) {
+               pr_err("Missing basic BPF support, skip this test: %s\n",
+                      strerror(errno));
+               return err;
+       }
+       close(err);
+
+       return 0;
+}
+
 int test__bpf(int i)
 {
        int err;
@@ -239,6 +289,9 @@ int test__bpf(int i)
                return TEST_SKIP;
        }
 
+       if (check_env())
+               return TEST_SKIP;
+
        err = __test__bpf(i);
        return err;
 }
index 313a48c6b2bc8e111e113e79c4a72fdc2d720ac5..afc9ad0a0515c5db77745d0d0c6964d950fa3bfc 100644 (file)
@@ -439,7 +439,7 @@ static int do_test_code_reading(bool try_kcore)
                .mmap_pages          = UINT_MAX,
                .user_freq           = UINT_MAX,
                .user_interval       = ULLONG_MAX,
-               .freq                = 4000,
+               .freq                = 500,
                .target              = {
                        .uses_mmap   = true,
                },
@@ -559,7 +559,13 @@ static int do_test_code_reading(bool try_kcore)
                                evlist = NULL;
                                continue;
                        }
-                       pr_debug("perf_evlist__open failed\n");
+
+                       if (verbose) {
+                               char errbuf[512];
+                               perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+                               pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
+                       }
+
                        goto out_put;
                }
                break;
index 5e6a86e50fb97aae648c16ce627157e8377ebb02..ecf136c385d5ff2f21111e813a9ce1e30c03365a 100644 (file)
@@ -191,7 +191,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec
         * function since TEST_ASSERT_VAL() returns in case of failure.
         */
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(hists_to_evsel(hists), NULL);
 
        if (verbose > 2) {
                pr_info("use callchain: %d, cumulate callchain: %d\n",
index 351a42463444a3df9f80daea3848499b8d39659a..34b945a55d4d2864cc561f36275191f57f84210a 100644 (file)
@@ -145,7 +145,7 @@ int test__hists_filter(int subtest __maybe_unused)
                struct hists *hists = evsel__hists(evsel);
 
                hists__collapse_resort(hists, NULL);
-               hists__output_resort(hists, NULL);
+               perf_evsel__output_resort(evsel, NULL);
 
                if (verbose > 2) {
                        pr_info("Normal histogram\n");
index b231265148d89a28e39387d423711c643351eb70..23cce67c7e48902b86c00cc6e56df22ef16386e4 100644 (file)
@@ -156,7 +156,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -256,7 +256,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -310,7 +310,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -388,7 +388,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -491,7 +491,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine)
                goto out;
 
        hists__collapse_resort(hists, NULL);
-       hists__output_resort(hists, NULL);
+       perf_evsel__output_resort(evsel, NULL);
 
        if (verbose > 2) {
                pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
index 06f45c1d42561df030e35d55078d70d910e79998..cff564fb4b66761f7f7fdcd5ee10727ad3b08726 100644 (file)
@@ -6,12 +6,6 @@
 #include "tests.h"
 #include "debug.h"
 
-static int perf_config_cb(const char *var, const char *val,
-                         void *arg __maybe_unused)
-{
-       return perf_default_config(var, val, arg);
-}
-
 #ifdef HAVE_LIBBPF_SUPPORT
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
@@ -35,6 +29,7 @@ static int test__bpf_parsing(void *obj_buf __maybe_unused,
 static struct {
        const char *source;
        const char *desc;
+       bool should_load_fail;
 } bpf_source_table[__LLVM_TESTCASE_MAX] = {
        [LLVM_TESTCASE_BASE] = {
                .source = test_llvm__bpf_base_prog,
@@ -48,14 +43,19 @@ static struct {
                .source = test_llvm__bpf_test_prologue_prog,
                .desc = "Compile source for BPF prologue generation test",
        },
+       [LLVM_TESTCASE_BPF_RELOCATION] = {
+               .source = test_llvm__bpf_test_relocation,
+               .desc = "Compile source for BPF relocation test",
+               .should_load_fail = true,
+       },
 };
 
-
 int
 test_llvm__fetch_bpf_obj(void **p_obj_buf,
                         size_t *p_obj_buf_sz,
                         enum test_llvm__testcase idx,
-                        bool force)
+                        bool force,
+                        bool *should_load_fail)
 {
        const char *source;
        const char *desc;
@@ -68,8 +68,8 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
 
        source = bpf_source_table[idx].source;
        desc = bpf_source_table[idx].desc;
-
-       perf_config(perf_config_cb, NULL);
+       if (should_load_fail)
+               *should_load_fail = bpf_source_table[idx].should_load_fail;
 
        /*
         * Skip this test if user's .perfconfig doesn't set [llvm] section
@@ -136,14 +136,15 @@ int test__llvm(int subtest)
        int ret;
        void *obj_buf = NULL;
        size_t obj_buf_sz = 0;
+       bool should_load_fail = false;
 
        if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
                return TEST_FAIL;
 
        ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
-                                      subtest, false);
+                                      subtest, false, &should_load_fail);
 
-       if (ret == TEST_OK) {
+       if (ret == TEST_OK && !should_load_fail) {
                ret = test__bpf_parsing(obj_buf, obj_buf_sz);
                if (ret != TEST_OK) {
                        pr_debug("Failed to parse test case '%s'\n",
index 5150b4d6ef50afe357f5f86300f4d39d28bec658..0eaa604be99defec6dcf3b09ee9a75eb348096fb 100644 (file)
@@ -7,14 +7,17 @@
 extern const char test_llvm__bpf_base_prog[];
 extern const char test_llvm__bpf_test_kbuild_prog[];
 extern const char test_llvm__bpf_test_prologue_prog[];
+extern const char test_llvm__bpf_test_relocation[];
 
 enum test_llvm__testcase {
        LLVM_TESTCASE_BASE,
        LLVM_TESTCASE_KBUILD,
        LLVM_TESTCASE_BPF_PROLOGUE,
+       LLVM_TESTCASE_BPF_RELOCATION,
        __LLVM_TESTCASE_MAX,
 };
 
 int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz,
-                            enum test_llvm__testcase index, bool force);
+                            enum test_llvm__testcase index, bool force,
+                            bool *should_load_fail);
 #endif
index f918015512af96d1173891dac2e8f460ff65ca85..cac15d93aea656f96ad449cba1f9529e42afcbec 100644 (file)
@@ -15,6 +15,7 @@ else
 PERF := .
 PERF_O := $(PERF)
 O_OPT :=
+FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O))
 
 ifneq ($(O),)
   FULL_O := $(shell readlink -f $(O) || echo $(O))
@@ -79,6 +80,7 @@ make_no_libaudit    := NO_LIBAUDIT=1
 make_no_libbionic   := NO_LIBBIONIC=1
 make_no_auxtrace    := NO_AUXTRACE=1
 make_no_libbpf     := NO_LIBBPF=1
+make_no_libcrypto   := NO_LIBCRYPTO=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -102,6 +104,7 @@ make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
+make_minimal        += NO_LIBCRYPTO=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -110,6 +113,9 @@ run := make_pure
 # disable features detection
 ifeq ($(MK),Makefile)
 run += make_clean_all
+MAKE_F := $(MAKE)
+else
+MAKE_F := $(MAKE) -f $(MK)
 endif
 run += make_python_perf_so
 run += make_debug
@@ -260,6 +266,8 @@ run := $(shell shuf -e $(run))
 run_O := $(shell shuf -e $(run_O))
 endif
 
+max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L)
+
 ifdef DEBUG
 d := $(info run   $(run))
 d := $(info run_O $(run_O))
@@ -267,13 +275,13 @@ endif
 
 MAKEFLAGS := --no-print-directory
 
-clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
+clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null)
 
 $(run):
        $(call clean)
        @TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
-       echo "- $@: $$cmd" && echo $$cmd > $@ && \
+       cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \
+       printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1; \
        echo "  test: $(call test,$@)" >> $@ 2>&1; \
        $(call test,$@) && \
@@ -283,8 +291,8 @@ $(run_O):
        $(call clean)
        @TMP_O=$$(mktemp -d); \
        TMP_DEST=$$(mktemp -d); \
-       cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
-       echo "- $@: $$cmd" && echo $$cmd > $@ && \
+       cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \
+       printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
        ( eval $$cmd ) >> $@ 2>&1 && \
        echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
        $(call test_O,$@) && \
@@ -313,11 +321,43 @@ make_kernelsrc_tools:
        (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
        test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
 
+FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
+FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
+
 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
        @echo OK
+       @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
 
 out: $(run_O)
        @echo OK
+       @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+ifeq ($(REUSE_FEATURES_DUMP),1)
+$(FEATURES_DUMP_FILE):
+       $(call clean)
+       @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \
+       echo "- $@: $$cmd" && echo $$cmd && \
+       ( eval $$cmd ) > /dev/null 2>&1
+
+$(FEATURES_DUMP_FILE_STATIC):
+       $(call clean)
+       @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \
+       echo "- $@: $$cmd" && echo $$cmd && \
+       ( eval $$cmd ) > /dev/null 2>&1
+
+# Add feature dump dependency for run/run_O targets
+$(foreach t,$(run) $(run_O),$(eval \
+       $(t): $(if $(findstring make_static,$(t)),\
+               $(FEATURES_DUMP_FILE_STATIC),\
+               $(FEATURES_DUMP_FILE))))
+
+# Append 'FEATURES_DUMP=' option to all test cases. For example:
+# make_no_libbpf: NO_LIBBPF=1  --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP
+# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC
+$(foreach t,$(run),$(if $(findstring make_static,$(t)),\
+                       $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\
+                       $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
+endif
 
 .PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
 endif # ifndef MK
index abe8849d1d7030bda2ae65f770424e3c9ad84330..7865f68dc0d82bea12c960c61792e9fd28857305 100644 (file)
@@ -1271,6 +1271,38 @@ static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist)
        return 0;
 }
 
+static int test__checkevent_config_symbol(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
+       return 0;
+}
+
+static int test__checkevent_config_raw(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
+       return 0;
+}
+
+static int test__checkevent_config_num(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
+       return 0;
+}
+
+static int test__checkevent_config_cache(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+       TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
+       return 0;
+}
+
 static int count_tracepoints(void)
 {
        struct dirent *events_ent;
@@ -1579,6 +1611,26 @@ static struct evlist_test test__events[] = {
                .check = test__checkevent_precise_max_modifier,
                .id    = 47,
        },
+       {
+               .name  = "instructions/name=insn/",
+               .check = test__checkevent_config_symbol,
+               .id    = 48,
+       },
+       {
+               .name  = "r1234/name=rawpmu/",
+               .check = test__checkevent_config_raw,
+               .id    = 49,
+       },
+       {
+               .name  = "4:0x6530160/name=numpmu/",
+               .check = test__checkevent_config_num,
+               .id    = 50,
+       },
+       {
+               .name  = "L1-dcache-misses/name=cachepmu/",
+               .check = test__checkevent_config_cache,
+               .id    = 51,
+       },
 };
 
 static struct evlist_test test__events_pmu[] = {
@@ -1666,7 +1718,7 @@ static int test_term(struct terms_test *t)
        }
 
        ret = t->check(&terms);
-       parse_events__free_terms(&terms);
+       parse_events_terms__purge(&terms);
 
        return ret;
 }
index f0bfc9e8fd9f617d69c0b3cb36fe6c210ebbd6d5..630b0b409b973f87ba00312e5e4e3d8fdf829f32 100644 (file)
@@ -110,7 +110,6 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
         */
        for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
                struct symbol *pair, *first_pair;
-               bool backwards = true;
 
                sym  = rb_entry(nd, struct symbol, rb_node);
 
@@ -151,27 +150,14 @@ next_pair:
                                continue;
 
                        } else {
-                               struct rb_node *nnd;
-detour:
-                               nnd = backwards ? rb_prev(&pair->rb_node) :
-                                                 rb_next(&pair->rb_node);
-                               if (nnd) {
-                                       struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
-
-                                       if (UM(next->start) == mem_start) {
-                                               pair = next;
+                               pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+                               if (pair) {
+                                       if (UM(pair->start) == mem_start)
                                                goto next_pair;
-                                       }
-                               }
 
-                               if (backwards) {
-                                       backwards = false;
-                                       pair = first_pair;
-                                       goto detour;
+                                       pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+                                                mem_start, sym->name, pair->name);
                                }
-
-                               pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
-                                        mem_start, sym->name, pair->name);
                        }
                } else
                        pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
index d37202121689a017b0790d552e088d695716c527..af68a9d488bfce964c84e67cf5394e7e13daab29 100644 (file)
@@ -531,8 +531,8 @@ static struct ui_browser_colorset {
                .bg       = "yellow",
        },
        {
-               .colorset = HE_COLORSET_CODE,
-               .name     = "code",
+               .colorset = HE_COLORSET_JUMP_ARROWS,
+               .name     = "jump_arrows",
                .fg       = "blue",
                .bg       = "default",
        },
index 01781de59532ce9c9fd1ff7f8c7fdf97d45fa105..be3b70eb5fca6e402830570c4111f10258702c04 100644 (file)
@@ -7,7 +7,7 @@
 #define HE_COLORSET_MEDIUM     51
 #define HE_COLORSET_NORMAL     52
 #define HE_COLORSET_SELECTED   53
-#define HE_COLORSET_CODE       54
+#define HE_COLORSET_JUMP_ARROWS        54
 #define HE_COLORSET_ADDR       55
 #define HE_COLORSET_ROOT       56
 
index 718bd46d47fa7bc88674a192dd1a8e8ab1fb7ca9..4fc208e82c6fc7b28d99af147d1c75738bc338e6 100644 (file)
@@ -284,7 +284,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
                to = (u64)btarget->idx;
        }
 
-       ui_browser__set_color(browser, HE_COLORSET_CODE);
+       ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
        __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width,
                                 from, to);
 }
index 08c09ad755d2d2f8dd55be06b258e16fb2c8335f..4b98165559462025ded4db4a41fe8b5e66a9f151 100644 (file)
@@ -32,6 +32,7 @@ struct hist_browser {
        bool                 show_headers;
        float                min_pcnt;
        u64                  nr_non_filtered_entries;
+       u64                  nr_hierarchy_entries;
        u64                  nr_callchain_rows;
 };
 
@@ -58,11 +59,11 @@ static int hist_browser__get_folding(struct hist_browser *browser)
 
        for (nd = rb_first(&hists->entries);
             (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-            nd = rb_next(nd)) {
+            nd = rb_hierarchy_next(nd)) {
                struct hist_entry *he =
                        rb_entry(nd, struct hist_entry, rb_node);
 
-               if (he->unfolded)
+               if (he->leaf && he->unfolded)
                        unfolded_rows += he->nr_rows;
        }
        return unfolded_rows;
@@ -72,7 +73,9 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb)
 {
        u32 nr_entries;
 
-       if (hist_browser__has_filter(hb))
+       if (symbol_conf.report_hierarchy)
+               nr_entries = hb->nr_hierarchy_entries;
+       else if (hist_browser__has_filter(hb))
                nr_entries = hb->nr_non_filtered_entries;
        else
                nr_entries = hb->hists->nr_entries;
@@ -247,6 +250,38 @@ static int callchain__count_rows(struct rb_root *chain)
        return n;
 }
 
+static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
+                               bool include_children)
+{
+       int count = 0;
+       struct rb_node *node;
+       struct hist_entry *child;
+
+       if (he->leaf)
+               return callchain__count_rows(&he->sorted_chain);
+
+       if (he->has_no_entry)
+               return 1;
+
+       node = rb_first(&he->hroot_out);
+       while (node) {
+               float percent;
+
+               child = rb_entry(node, struct hist_entry, rb_node);
+               percent = hist_entry__get_percent_limit(child);
+
+               if (!child->filtered && percent >= hb->min_pcnt) {
+                       count++;
+
+                       if (include_children && child->unfolded)
+                               count += hierarchy_count_rows(hb, child, true);
+               }
+
+               node = rb_next(node);
+       }
+       return count;
+}
+
 static bool hist_entry__toggle_fold(struct hist_entry *he)
 {
        if (!he)
@@ -326,11 +361,17 @@ static void callchain__init_have_children(struct rb_root *root)
 
 static void hist_entry__init_have_children(struct hist_entry *he)
 {
-       if (!he->init_have_children) {
+       if (he->init_have_children)
+               return;
+
+       if (he->leaf) {
                he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
                callchain__init_have_children(&he->sorted_chain);
-               he->init_have_children = true;
+       } else {
+               he->has_children = !RB_EMPTY_ROOT(&he->hroot_out);
        }
+
+       he->init_have_children = true;
 }
 
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
@@ -349,17 +390,49 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser)
                has_children = callchain_list__toggle_fold(cl);
 
        if (has_children) {
+               int child_rows = 0;
+
                hist_entry__init_have_children(he);
                browser->b.nr_entries -= he->nr_rows;
-               browser->nr_callchain_rows -= he->nr_rows;
 
-               if (he->unfolded)
-                       he->nr_rows = callchain__count_rows(&he->sorted_chain);
+               if (he->leaf)
+                       browser->nr_callchain_rows -= he->nr_rows;
                else
+                       browser->nr_hierarchy_entries -= he->nr_rows;
+
+               if (symbol_conf.report_hierarchy)
+                       child_rows = hierarchy_count_rows(browser, he, true);
+
+               if (he->unfolded) {
+                       if (he->leaf)
+                               he->nr_rows = callchain__count_rows(&he->sorted_chain);
+                       else
+                               he->nr_rows = hierarchy_count_rows(browser, he, false);
+
+                       /* account grand children */
+                       if (symbol_conf.report_hierarchy)
+                               browser->b.nr_entries += child_rows - he->nr_rows;
+
+                       if (!he->leaf && he->nr_rows == 0) {
+                               he->has_no_entry = true;
+                               he->nr_rows = 1;
+                       }
+               } else {
+                       if (symbol_conf.report_hierarchy)
+                               browser->b.nr_entries -= child_rows - he->nr_rows;
+
+                       if (he->has_no_entry)
+                               he->has_no_entry = false;
+
                        he->nr_rows = 0;
+               }
 
                browser->b.nr_entries += he->nr_rows;
-               browser->nr_callchain_rows += he->nr_rows;
+
+               if (he->leaf)
+                       browser->nr_callchain_rows += he->nr_rows;
+               else
+                       browser->nr_hierarchy_entries += he->nr_rows;
 
                return true;
        }
@@ -422,13 +495,38 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold)
        return n;
 }
 
-static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
+static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+                                bool unfold __maybe_unused)
+{
+       float percent;
+       struct rb_node *nd;
+       struct hist_entry *child;
+       int n = 0;
+
+       for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) {
+               child = rb_entry(nd, struct hist_entry, rb_node);
+               percent = hist_entry__get_percent_limit(child);
+               if (!child->filtered && percent >= hb->min_pcnt)
+                       n++;
+       }
+
+       return n;
+}
+
+static void hist_entry__set_folding(struct hist_entry *he,
+                                   struct hist_browser *hb, bool unfold)
 {
        hist_entry__init_have_children(he);
        he->unfolded = unfold ? he->has_children : false;
 
        if (he->has_children) {
-               int n = callchain__set_folding(&he->sorted_chain, unfold);
+               int n;
+
+               if (he->leaf)
+                       n = callchain__set_folding(&he->sorted_chain, unfold);
+               else
+                       n = hierarchy_set_folding(hb, he, unfold);
+
                he->nr_rows = unfold ? n : 0;
        } else
                he->nr_rows = 0;
@@ -438,19 +536,38 @@ static void
 __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
        struct rb_node *nd;
-       struct hists *hists = browser->hists;
+       struct hist_entry *he;
+       double percent;
 
-       for (nd = rb_first(&hists->entries);
-            (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
-            nd = rb_next(nd)) {
-               struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
-               hist_entry__set_folding(he, unfold);
-               browser->nr_callchain_rows += he->nr_rows;
+       nd = rb_first(&browser->hists->entries);
+       while (nd) {
+               he = rb_entry(nd, struct hist_entry, rb_node);
+
+               /* set folding state even if it's currently folded */
+               nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+               hist_entry__set_folding(he, browser, unfold);
+
+               percent = hist_entry__get_percent_limit(he);
+               if (he->filtered || percent < browser->min_pcnt)
+                       continue;
+
+               if (!he->depth || unfold)
+                       browser->nr_hierarchy_entries++;
+               if (he->leaf)
+                       browser->nr_callchain_rows += he->nr_rows;
+               else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+                       browser->nr_hierarchy_entries++;
+                       he->has_no_entry = true;
+                       he->nr_rows = 1;
+               } else
+                       he->has_no_entry = false;
        }
 }
 
 static void hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
+       browser->nr_hierarchy_entries = 0;
        browser->nr_callchain_rows = 0;
        __hist_browser__set_folding(browser, unfold);
 
@@ -657,9 +774,24 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
        return 1;
 }
 
+static bool check_percent_display(struct rb_node *node, u64 parent_total)
+{
+       struct callchain_node *child;
+
+       if (node == NULL)
+               return false;
+
+       if (rb_next(node))
+               return true;
+
+       child = rb_entry(node, struct callchain_node, rb_node);
+       return callchain_cumul_hits(child) != parent_total;
+}
+
 static int hist_browser__show_callchain_flat(struct hist_browser *browser,
                                             struct rb_root *root,
                                             unsigned short row, u64 total,
+                                            u64 parent_total,
                                             print_callchain_entry_fn print,
                                             struct callchain_print_arg *arg,
                                             check_output_full_fn is_output_full)
@@ -669,7 +801,7 @@ static int hist_browser__show_callchain_flat(struct hist_browser *browser,
        bool need_percent;
 
        node = rb_first(root);
-       need_percent = node && rb_next(node);
+       need_percent = check_percent_display(node, parent_total);
 
        while (node) {
                struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -763,6 +895,7 @@ static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
 static int hist_browser__show_callchain_folded(struct hist_browser *browser,
                                               struct rb_root *root,
                                               unsigned short row, u64 total,
+                                              u64 parent_total,
                                               print_callchain_entry_fn print,
                                               struct callchain_print_arg *arg,
                                               check_output_full_fn is_output_full)
@@ -772,7 +905,7 @@ static int hist_browser__show_callchain_folded(struct hist_browser *browser,
        bool need_percent;
 
        node = rb_first(root);
-       need_percent = node && rb_next(node);
+       need_percent = check_percent_display(node, parent_total);
 
        while (node) {
                struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -844,20 +977,24 @@ next:
        return row - first_row;
 }
 
-static int hist_browser__show_callchain(struct hist_browser *browser,
+static int hist_browser__show_callchain_graph(struct hist_browser *browser,
                                        struct rb_root *root, int level,
                                        unsigned short row, u64 total,
+                                       u64 parent_total,
                                        print_callchain_entry_fn print,
                                        struct callchain_print_arg *arg,
                                        check_output_full_fn is_output_full)
 {
        struct rb_node *node;
        int first_row = row, offset = level * LEVEL_OFFSET_STEP;
-       u64 new_total;
        bool need_percent;
+       u64 percent_total = total;
+
+       if (callchain_param.mode == CHAIN_GRAPH_REL)
+               percent_total = parent_total;
 
        node = rb_first(root);
-       need_percent = node && rb_next(node);
+       need_percent = check_percent_display(node, parent_total);
 
        while (node) {
                struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
@@ -878,7 +1015,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
                        folded_sign = callchain_list__folded(chain);
 
                        row += hist_browser__show_callchain_list(browser, child,
-                                                       chain, row, total,
+                                                       chain, row, percent_total,
                                                        was_first && need_percent,
                                                        offset + extra_offset,
                                                        print, arg);
@@ -893,13 +1030,9 @@ static int hist_browser__show_callchain(struct hist_browser *browser,
                if (folded_sign == '-') {
                        const int new_level = level + (extra_offset ? 2 : 1);
 
-                       if (callchain_param.mode == CHAIN_GRAPH_REL)
-                               new_total = child->children_hit;
-                       else
-                               new_total = total;
-
-                       row += hist_browser__show_callchain(browser, &child->rb_root,
-                                                           new_level, row, new_total,
+                       row += hist_browser__show_callchain_graph(browser, &child->rb_root,
+                                                           new_level, row, total,
+                                                           child->children_hit,
                                                            print, arg, is_output_full);
                }
                if (is_output_full(browser, row))
@@ -910,6 +1043,45 @@ out:
        return row - first_row;
 }
 
+static int hist_browser__show_callchain(struct hist_browser *browser,
+                                       struct hist_entry *entry, int level,
+                                       unsigned short row,
+                                       print_callchain_entry_fn print,
+                                       struct callchain_print_arg *arg,
+                                       check_output_full_fn is_output_full)
+{
+       u64 total = hists__total_period(entry->hists);
+       u64 parent_total;
+       int printed;
+
+       if (symbol_conf.cumulate_callchain)
+               parent_total = entry->stat_acc->period;
+       else
+               parent_total = entry->stat.period;
+
+       if (callchain_param.mode == CHAIN_FLAT) {
+               printed = hist_browser__show_callchain_flat(browser,
+                                               &entry->sorted_chain, row,
+                                               total, parent_total, print, arg,
+                                               is_output_full);
+       } else if (callchain_param.mode == CHAIN_FOLDED) {
+               printed = hist_browser__show_callchain_folded(browser,
+                                               &entry->sorted_chain, row,
+                                               total, parent_total, print, arg,
+                                               is_output_full);
+       } else {
+               printed = hist_browser__show_callchain_graph(browser,
+                                               &entry->sorted_chain, level, row,
+                                               total, parent_total, print, arg,
+                                               is_output_full);
+       }
+
+       if (arg->is_current_entry)
+               browser->he_selection = entry;
+
+       return printed;
+}
+
 struct hpp_arg {
        struct ui_browser *b;
        char folded_sign;
@@ -1006,7 +1178,6 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                                    struct hist_entry *entry,
                                    unsigned short row)
 {
-       char s[256];
        int printed = 0;
        int width = browser->b.width;
        char folded_sign = ' ';
@@ -1031,16 +1202,18 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                        .folded_sign    = folded_sign,
                        .current_entry  = current_entry,
                };
-               struct perf_hpp hpp = {
-                       .buf            = s,
-                       .size           = sizeof(s),
-                       .ptr            = &arg,
-               };
                int column = 0;
 
                hist_browser__gotorc(browser, row, 0);
 
-               perf_hpp__for_each_format(fmt) {
+               hists__for_each_format(browser->hists, fmt) {
+                       char s[2048];
+                       struct perf_hpp hpp = {
+                               .buf    = s,
+                               .size   = sizeof(s),
+                               .ptr    = &arg,
+                       };
+
                        if (perf_hpp__should_skip(fmt, entry->hists) ||
                            column++ < browser->b.horiz_scroll)
                                continue;
@@ -1065,11 +1238,18 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                        }
 
                        if (fmt->color) {
-                               width -= fmt->color(fmt, &hpp, entry);
+                               int ret = fmt->color(fmt, &hpp, entry);
+                               hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+                               /*
+                                * fmt->color() already used ui_browser to
+                                * print the non alignment bits, skip it (+ret):
+                                */
+                               ui_browser__printf(&browser->b, "%s", s + ret);
                        } else {
-                               width -= fmt->entry(fmt, &hpp, entry);
+                               hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry));
                                ui_browser__printf(&browser->b, "%s", s);
                        }
+                       width -= hpp.buf - s;
                }
 
                /* The scroll bar isn't being used */
@@ -1084,43 +1264,246 @@ static int hist_browser__show_entry(struct hist_browser *browser,
                --row_offset;
 
        if (folded_sign == '-' && row != browser->b.rows) {
-               u64 total = hists__total_period(entry->hists);
                struct callchain_print_arg arg = {
                        .row_offset = row_offset,
                        .is_current_entry = current_entry,
                };
 
-               if (callchain_param.mode == CHAIN_GRAPH_REL) {
-                       if (symbol_conf.cumulate_callchain)
-                               total = entry->stat_acc->period;
-                       else
-                               total = entry->stat.period;
-               }
-
-               if (callchain_param.mode == CHAIN_FLAT) {
-                       printed += hist_browser__show_callchain_flat(browser,
-                                       &entry->sorted_chain, row, total,
-                                       hist_browser__show_callchain_entry, &arg,
-                                       hist_browser__check_output_full);
-               } else if (callchain_param.mode == CHAIN_FOLDED) {
-                       printed += hist_browser__show_callchain_folded(browser,
-                                       &entry->sorted_chain, row, total,
+               printed += hist_browser__show_callchain(browser, entry, 1, row,
                                        hist_browser__show_callchain_entry, &arg,
                                        hist_browser__check_output_full);
+       }
+
+       return printed;
+}
+
+static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
+                                             struct hist_entry *entry,
+                                             unsigned short row,
+                                             int level)
+{
+       int printed = 0;
+       int width = browser->b.width;
+       char folded_sign = ' ';
+       bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+       off_t row_offset = entry->row_offset;
+       bool first = true;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       struct hpp_arg arg = {
+               .b              = &browser->b,
+               .current_entry  = current_entry,
+       };
+       int column = 0;
+       int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+       if (current_entry) {
+               browser->he_selection = entry;
+               browser->selection = &entry->ms;
+       }
+
+       hist_entry__init_have_children(entry);
+       folded_sign = hist_entry__folded(entry);
+       arg.folded_sign = folded_sign;
+
+       if (entry->leaf && row_offset) {
+               row_offset--;
+               goto show_callchain;
+       }
+
+       hist_browser__gotorc(browser, row, 0);
+
+       if (current_entry && browser->b.navkeypressed)
+               ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+       else
+               ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+       ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+       width -= level * HIERARCHY_INDENT;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&entry->hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               char s[2048];
+               struct perf_hpp hpp = {
+                       .buf            = s,
+                       .size           = sizeof(s),
+                       .ptr            = &arg,
+               };
+
+               if (perf_hpp__should_skip(fmt, entry->hists) ||
+                   column++ < browser->b.horiz_scroll)
+                       continue;
+
+               if (current_entry && browser->b.navkeypressed) {
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_SELECTED);
                } else {
-                       printed += hist_browser__show_callchain(browser,
-                                       &entry->sorted_chain, 1, row, total,
-                                       hist_browser__show_callchain_entry, &arg,
-                                       hist_browser__check_output_full);
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_NORMAL);
+               }
+
+               if (first) {
+                       ui_browser__printf(&browser->b, "%c", folded_sign);
+                       width--;
+                       first = false;
+               } else {
+                       ui_browser__printf(&browser->b, "  ");
+                       width -= 2;
+               }
+
+               if (fmt->color) {
+                       int ret = fmt->color(fmt, &hpp, entry);
+                       hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+                       /*
+                        * fmt->color() already used ui_browser to
+                        * print the non alignment bits, skip it (+ret):
+                        */
+                       ui_browser__printf(&browser->b, "%s", s + ret);
+               } else {
+                       int ret = fmt->entry(fmt, &hpp, entry);
+                       hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+                       ui_browser__printf(&browser->b, "%s", s);
+               }
+               width -= hpp.buf - s;
+       }
+
+       ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+       width -= hierarchy_indent;
+
+       if (column >= browser->b.horiz_scroll) {
+               char s[2048];
+               struct perf_hpp hpp = {
+                       .buf            = s,
+                       .size           = sizeof(s),
+                       .ptr            = &arg,
+               };
+
+               if (current_entry && browser->b.navkeypressed) {
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_SELECTED);
+               } else {
+                       ui_browser__set_color(&browser->b,
+                                             HE_COLORSET_NORMAL);
                }
 
-               if (arg.is_current_entry)
-                       browser->he_selection = entry;
+               perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
+                       ui_browser__write_nstring(&browser->b, "", 2);
+                       width -= 2;
+
+                       /*
+                        * No need to call hist_entry__snprintf_alignment()
+                        * since this fmt is always the last column in the
+                        * hierarchy mode.
+                        */
+                       if (fmt->color) {
+                               width -= fmt->color(fmt, &hpp, entry);
+                       } else {
+                               int i = 0;
+
+                               width -= fmt->entry(fmt, &hpp, entry);
+                               ui_browser__printf(&browser->b, "%s", ltrim(s));
+
+                               while (isspace(s[i++]))
+                                       width++;
+                       }
+               }
+       }
+
+       /* The scroll bar isn't being used */
+       if (!browser->b.navkeypressed)
+               width += 1;
+
+       ui_browser__write_nstring(&browser->b, "", width);
+
+       ++row;
+       ++printed;
+
+show_callchain:
+       if (entry->leaf && folded_sign == '-' && row != browser->b.rows) {
+               struct callchain_print_arg carg = {
+                       .row_offset = row_offset,
+               };
+
+               printed += hist_browser__show_callchain(browser, entry,
+                                       level + 1, row,
+                                       hist_browser__show_callchain_entry, &carg,
+                                       hist_browser__check_output_full);
        }
 
        return printed;
 }
 
+static int hist_browser__show_no_entry(struct hist_browser *browser,
+                                      unsigned short row, int level)
+{
+       int width = browser->b.width;
+       bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+       bool first = true;
+       int column = 0;
+       int ret;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       int indent = browser->hists->nr_hpp_node - 2;
+
+       if (current_entry) {
+               browser->he_selection = NULL;
+               browser->selection = NULL;
+       }
+
+       hist_browser__gotorc(browser, row, 0);
+
+       if (current_entry && browser->b.navkeypressed)
+               ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+       else
+               ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+       ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+       width -= level * HIERARCHY_INDENT;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&browser->hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (perf_hpp__should_skip(fmt, browser->hists) ||
+                   column++ < browser->b.horiz_scroll)
+                       continue;
+
+               ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists));
+
+               if (first) {
+                       /* for folded sign */
+                       first = false;
+                       ret++;
+               } else {
+                       /* space between columns */
+                       ret += 2;
+               }
+
+               ui_browser__write_nstring(&browser->b, "", ret);
+               width -= ret;
+       }
+
+       ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT);
+       width -= indent * HIERARCHY_INDENT;
+
+       if (column >= browser->b.horiz_scroll) {
+               char buf[32];
+
+               ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt);
+               ui_browser__printf(&browser->b, "  %s", buf);
+               width -= ret + 2;
+       }
+
+       /* The scroll bar isn't being used */
+       if (!browser->b.navkeypressed)
+               width += 1;
+
+       ui_browser__write_nstring(&browser->b, "", width);
+       return 1;
+}
+
 static int advance_hpp_check(struct perf_hpp *hpp, int inc)
 {
        advance_hpp(hpp, inc);
@@ -1144,7 +1527,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
                        return ret;
        }
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(browser->hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
                        continue;
 
@@ -1160,11 +1543,96 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *
        return ret;
 }
 
+static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size)
+{
+       struct hists *hists = browser->hists;
+       struct perf_hpp dummy_hpp = {
+               .buf    = buf,
+               .size   = size,
+       };
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       size_t ret = 0;
+       int column = 0;
+       int indent = hists->nr_hpp_node - 2;
+       bool first_node, first_col;
+
+       ret = scnprintf(buf, size, " ");
+       if (advance_hpp_check(&dummy_hpp, ret))
+               return ret;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (column++ < browser->b.horiz_scroll)
+                       continue;
+
+               ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+               if (advance_hpp_check(&dummy_hpp, ret))
+                       break;
+
+               ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
+               if (advance_hpp_check(&dummy_hpp, ret))
+                       break;
+       }
+
+       ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+                       indent * HIERARCHY_INDENT, "");
+       if (advance_hpp_check(&dummy_hpp, ret))
+               return ret;
+
+       first_node = true;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               if (!first_node) {
+                       ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / ");
+                       if (advance_hpp_check(&dummy_hpp, ret))
+                               break;
+               }
+               first_node = false;
+
+               first_col = true;
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       char *start;
+
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col) {
+                               ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+");
+                               if (advance_hpp_check(&dummy_hpp, ret))
+                                       break;
+                       }
+                       first_col = false;
+
+                       ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
+                       dummy_hpp.buf[ret] = '\0';
+                       rtrim(dummy_hpp.buf);
+
+                       start = ltrim(dummy_hpp.buf);
+                       ret = strlen(start);
+
+                       if (start != dummy_hpp.buf)
+                               memmove(dummy_hpp.buf, start, ret + 1);
+
+                       if (advance_hpp_check(&dummy_hpp, ret))
+                               break;
+               }
+       }
+
+       return ret;
+}
+
 static void hist_browser__show_headers(struct hist_browser *browser)
 {
        char headers[1024];
 
-       hists_browser__scnprintf_headers(browser, headers, sizeof(headers));
+       if (symbol_conf.report_hierarchy)
+               hists_browser__scnprintf_hierarchy_headers(browser, headers,
+                                                          sizeof(headers));
+       else
+               hists_browser__scnprintf_headers(browser, headers,
+                                                sizeof(headers));
        ui_browser__gotorc(&browser->b, 0, 0);
        ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
        ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
@@ -1196,18 +1664,34 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
        hb->he_selection = NULL;
        hb->selection = NULL;
 
-       for (nd = browser->top; nd; nd = rb_next(nd)) {
+       for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
                float percent;
 
-               if (h->filtered)
+               if (h->filtered) {
+                       /* let it move to sibling */
+                       h->unfolded = false;
                        continue;
+               }
 
                percent = hist_entry__get_percent_limit(h);
                if (percent < hb->min_pcnt)
                        continue;
 
-               row += hist_browser__show_entry(hb, h, row);
+               if (symbol_conf.report_hierarchy) {
+                       row += hist_browser__show_hierarchy_entry(hb, h, row,
+                                                                 h->depth);
+                       if (row == browser->rows)
+                               break;
+
+                       if (h->has_no_entry) {
+                               hist_browser__show_no_entry(hb, row, h->depth + 1);
+                               row++;
+                       }
+               } else {
+                       row += hist_browser__show_entry(hb, h, row);
+               }
+
                if (row == browser->rows)
                        break;
        }
@@ -1225,7 +1709,14 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd,
                if (!h->filtered && percent >= min_pcnt)
                        return nd;
 
-               nd = rb_next(nd);
+               /*
+                * If it's filtered, its all children also were filtered.
+                * So move to sibling node.
+                */
+               if (rb_next(nd))
+                       nd = rb_next(nd);
+               else
+                       nd = rb_hierarchy_next(nd);
        }
 
        return NULL;
@@ -1241,7 +1732,7 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
                if (!h->filtered && percent >= min_pcnt)
                        return nd;
 
-               nd = rb_prev(nd);
+               nd = rb_hierarchy_prev(nd);
        }
 
        return NULL;
@@ -1271,8 +1762,8 @@ static void ui_browser__hists_seek(struct ui_browser *browser,
                nd = browser->top;
                goto do_offset;
        case SEEK_END:
-               nd = hists__filter_prev_entries(rb_last(browser->entries),
-                                               hb->min_pcnt);
+               nd = rb_hierarchy_last(rb_last(browser->entries));
+               nd = hists__filter_prev_entries(nd, hb->min_pcnt);
                first = false;
                break;
        default:
@@ -1306,7 +1797,7 @@ do_offset:
        if (offset > 0) {
                do {
                        h = rb_entry(nd, struct hist_entry, rb_node);
-                       if (h->unfolded) {
+                       if (h->unfolded && h->leaf) {
                                u16 remaining = h->nr_rows - h->row_offset;
                                if (offset > remaining) {
                                        offset -= remaining;
@@ -1318,7 +1809,8 @@ do_offset:
                                        break;
                                }
                        }
-                       nd = hists__filter_entries(rb_next(nd), hb->min_pcnt);
+                       nd = hists__filter_entries(rb_hierarchy_next(nd),
+                                                  hb->min_pcnt);
                        if (nd == NULL)
                                break;
                        --offset;
@@ -1327,7 +1819,7 @@ do_offset:
        } else if (offset < 0) {
                while (1) {
                        h = rb_entry(nd, struct hist_entry, rb_node);
-                       if (h->unfolded) {
+                       if (h->unfolded && h->leaf) {
                                if (first) {
                                        if (-offset > h->row_offset) {
                                                offset += h->row_offset;
@@ -1351,7 +1843,7 @@ do_offset:
                                }
                        }
 
-                       nd = hists__filter_prev_entries(rb_prev(nd),
+                       nd = hists__filter_prev_entries(rb_hierarchy_prev(nd),
                                                        hb->min_pcnt);
                        if (nd == NULL)
                                break;
@@ -1364,7 +1856,7 @@ do_offset:
                                 * row_offset at its last entry.
                                 */
                                h = rb_entry(nd, struct hist_entry, rb_node);
-                               if (h->unfolded)
+                               if (h->unfolded && h->leaf)
                                        h->row_offset = h->nr_rows;
                                break;
                        }
@@ -1378,17 +1870,14 @@ do_offset:
 }
 
 static int hist_browser__fprintf_callchain(struct hist_browser *browser,
-                                          struct hist_entry *he, FILE *fp)
+                                          struct hist_entry *he, FILE *fp,
+                                          int level)
 {
-       u64 total = hists__total_period(he->hists);
        struct callchain_print_arg arg  = {
                .fp = fp,
        };
 
-       if (symbol_conf.cumulate_callchain)
-               total = he->stat_acc->period;
-
-       hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total,
+       hist_browser__show_callchain(browser, he, level, 0,
                                     hist_browser__fprintf_callchain_entry, &arg,
                                     hist_browser__check_dump_full);
        return arg.printed;
@@ -1414,7 +1903,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
        if (symbol_conf.use_callchain)
                printed += fprintf(fp, "%c ", folded_sign);
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(browser->hists, fmt) {
                if (perf_hpp__should_skip(fmt, he->hists))
                        continue;
 
@@ -1425,12 +1914,71 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser,
                        first = false;
 
                ret = fmt->entry(fmt, &hpp, he);
+               ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret);
                advance_hpp(&hpp, ret);
        }
-       printed += fprintf(fp, "%s\n", rtrim(s));
+       printed += fprintf(fp, "%s\n", s);
 
        if (folded_sign == '-')
-               printed += hist_browser__fprintf_callchain(browser, he, fp);
+               printed += hist_browser__fprintf_callchain(browser, he, fp, 1);
+
+       return printed;
+}
+
+
+static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
+                                                struct hist_entry *he,
+                                                FILE *fp, int level)
+{
+       char s[8192];
+       int printed = 0;
+       char folded_sign = ' ';
+       struct perf_hpp hpp = {
+               .buf = s,
+               .size = sizeof(s),
+       };
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       bool first = true;
+       int ret;
+       int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+       printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
+
+       folded_sign = hist_entry__folded(he);
+       printed += fprintf(fp, "%c", folded_sign);
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&he->hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (!first) {
+                       ret = scnprintf(hpp.buf, hpp.size, "  ");
+                       advance_hpp(&hpp, ret);
+               } else
+                       first = false;
+
+               ret = fmt->entry(fmt, &hpp, he);
+               advance_hpp(&hpp, ret);
+       }
+
+       ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, "");
+       advance_hpp(&hpp, ret);
+
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               ret = scnprintf(hpp.buf, hpp.size, "  ");
+               advance_hpp(&hpp, ret);
+
+               ret = fmt->entry(fmt, &hpp, he);
+               advance_hpp(&hpp, ret);
+       }
+
+       printed += fprintf(fp, "%s\n", rtrim(s));
+
+       if (he->leaf && folded_sign == '-') {
+               printed += hist_browser__fprintf_callchain(browser, he, fp,
+                                                          he->depth + 1);
+       }
 
        return printed;
 }
@@ -1444,8 +1992,16 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
        while (nd) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-               printed += hist_browser__fprintf_entry(browser, h, fp);
-               nd = hists__filter_entries(rb_next(nd), browser->min_pcnt);
+               if (symbol_conf.report_hierarchy) {
+                       printed += hist_browser__fprintf_hierarchy_entry(browser,
+                                                                        h, fp,
+                                                                        h->depth);
+               } else {
+                       printed += hist_browser__fprintf_entry(browser, h, fp);
+               }
+
+               nd = hists__filter_entries(rb_hierarchy_next(nd),
+                                          browser->min_pcnt);
        }
 
        return printed;
@@ -1580,11 +2136,18 @@ static int hists__browser_title(struct hists *hists,
        if (hists->uid_filter_str)
                printed += snprintf(bf + printed, size - printed,
                                    ", UID: %s", hists->uid_filter_str);
-       if (thread)
-               printed += scnprintf(bf + printed, size - printed,
+       if (thread) {
+               if (sort__has_thread) {
+                       printed += scnprintf(bf + printed, size - printed,
                                    ", Thread: %s(%d)",
                                     (thread->comm_set ? thread__comm_str(thread) : ""),
                                    thread->tid);
+               } else {
+                       printed += scnprintf(bf + printed, size - printed,
+                                   ", Thread: %s",
+                                    (thread->comm_set ? thread__comm_str(thread) : ""));
+               }
+       }
        if (dso)
                printed += scnprintf(bf + printed, size - printed,
                                    ", DSO: %s", dso->short_name);
@@ -1759,15 +2322,24 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
 {
        struct thread *thread = act->thread;
 
+       if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+               return 0;
+
        if (browser->hists->thread_filter) {
                pstack__remove(browser->pstack, &browser->hists->thread_filter);
                perf_hpp__set_elide(HISTC_THREAD, false);
                thread__zput(browser->hists->thread_filter);
                ui_helpline__pop();
        } else {
-               ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
-                                  thread->comm_set ? thread__comm_str(thread) : "",
-                                  thread->tid);
+               if (sort__has_thread) {
+                       ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+                                          thread->comm_set ? thread__comm_str(thread) : "",
+                                          thread->tid);
+               } else {
+                       ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+                                          thread->comm_set ? thread__comm_str(thread) : "");
+               }
+
                browser->hists->thread_filter = thread__get(thread);
                perf_hpp__set_elide(HISTC_THREAD, false);
                pstack__push(browser->pstack, &browser->hists->thread_filter);
@@ -1782,13 +2354,22 @@ static int
 add_thread_opt(struct hist_browser *browser, struct popup_action *act,
               char **optstr, struct thread *thread)
 {
-       if (thread == NULL)
+       int ret;
+
+       if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
                return 0;
 
-       if (asprintf(optstr, "Zoom %s %s(%d) thread",
-                    browser->hists->thread_filter ? "out of" : "into",
-                    thread->comm_set ? thread__comm_str(thread) : "",
-                    thread->tid) < 0)
+       if (sort__has_thread) {
+               ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+                              browser->hists->thread_filter ? "out of" : "into",
+                              thread->comm_set ? thread__comm_str(thread) : "",
+                              thread->tid);
+       } else {
+               ret = asprintf(optstr, "Zoom %s %s thread",
+                              browser->hists->thread_filter ? "out of" : "into",
+                              thread->comm_set ? thread__comm_str(thread) : "");
+       }
+       if (ret < 0)
                return 0;
 
        act->thread = thread;
@@ -1801,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
 {
        struct map *map = act->ms.map;
 
+       if (!sort__has_dso || map == NULL)
+               return 0;
+
        if (browser->hists->dso_filter) {
                pstack__remove(browser->pstack, &browser->hists->dso_filter);
                perf_hpp__set_elide(HISTC_DSO, false);
@@ -1825,7 +2409,7 @@ static int
 add_dso_opt(struct hist_browser *browser, struct popup_action *act,
            char **optstr, struct map *map)
 {
-       if (map == NULL)
+       if (!sort__has_dso || map == NULL)
                return 0;
 
        if (asprintf(optstr, "Zoom %s %s DSO",
@@ -1850,7 +2434,7 @@ static int
 add_map_opt(struct hist_browser *browser __maybe_unused,
            struct popup_action *act, char **optstr, struct map *map)
 {
-       if (map == NULL)
+       if (!sort__has_dso || map == NULL)
                return 0;
 
        if (asprintf(optstr, "Browse map details") < 0)
@@ -1952,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused,
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
+       if (!sort__has_socket || act->socket < 0)
+               return 0;
+
        if (browser->hists->socket_filter > -1) {
                pstack__remove(browser->pstack, &browser->hists->socket_filter);
                browser->hists->socket_filter = -1;
@@ -1971,7 +2558,7 @@ static int
 add_socket_opt(struct hist_browser *browser, struct popup_action *act,
               char **optstr, int socket_id)
 {
-       if (socket_id < 0)
+       if (!sort__has_socket || socket_id < 0)
                return 0;
 
        if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -1989,17 +2576,60 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb)
        u64 nr_entries = 0;
        struct rb_node *nd = rb_first(&hb->hists->entries);
 
-       if (hb->min_pcnt == 0) {
+       if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
                hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
                return;
        }
 
        while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
                nr_entries++;
-               nd = rb_next(nd);
+               nd = rb_hierarchy_next(nd);
        }
 
        hb->nr_non_filtered_entries = nr_entries;
+       hb->nr_hierarchy_entries = nr_entries;
+}
+
+static void hist_browser__update_percent_limit(struct hist_browser *hb,
+                                              double percent)
+{
+       struct hist_entry *he;
+       struct rb_node *nd = rb_first(&hb->hists->entries);
+       u64 total = hists__total_period(hb->hists);
+       u64 min_callchain_hits = total * (percent / 100);
+
+       hb->min_pcnt = callchain_param.min_percent = percent;
+
+       while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+               he = rb_entry(nd, struct hist_entry, rb_node);
+
+               if (he->has_no_entry) {
+                       he->has_no_entry = false;
+                       he->nr_rows = 0;
+               }
+
+               if (!he->leaf || !symbol_conf.use_callchain)
+                       goto next;
+
+               if (callchain_param.mode == CHAIN_GRAPH_REL) {
+                       total = he->stat.period;
+
+                       if (symbol_conf.cumulate_callchain)
+                               total = he->stat_acc->period;
+
+                       min_callchain_hits = total * (percent / 100);
+               }
+
+               callchain_param.sort(&he->sorted_chain, he->callchain,
+                                    min_callchain_hits, &callchain_param);
+
+next:
+               nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+               /* force to re-evaluate folding state of callchains */
+               he->init_have_children = false;
+               hist_entry__set_folding(he, hb, false);
+       }
 }
 
 static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
@@ -2037,6 +2667,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
        "E             Expand all callchains\n"                         \
        "F             Toggle percentage of filtered entries\n"         \
        "H             Display column headers\n"                        \
+       "L             Change percent limit\n"                          \
        "m             Display context menu\n"                          \
        "S             Zoom into current Processor Socket\n"            \
 
@@ -2077,7 +2708,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
        memset(options, 0, sizeof(options));
        memset(actions, 0, sizeof(actions));
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(browser->hists, fmt) {
                perf_hpp__reset_width(fmt, hists);
                /*
                 * This is done just once, and activates the horizontal scrolling
@@ -2192,6 +2823,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                                top->zero = !top->zero;
                        }
                        continue;
+               case 'L':
+                       if (ui_browser__input_window("Percent Limit",
+                                       "Please enter the value you want to hide entries under that percent.",
+                                       buf, "ENTER: OK, ESC: Cancel",
+                                       delay_secs * 2) == K_ENTER) {
+                               char *end;
+                               double new_percent = strtod(buf, &end);
+
+                               if (new_percent < 0 || new_percent > 100) {
+                                       ui_browser__warning(&browser->b, delay_secs * 2,
+                                               "Invalid percent: %.2f", new_percent);
+                                       continue;
+                               }
+
+                               hist_browser__update_percent_limit(browser, new_percent);
+                               hist_browser__reset(browser);
+                       }
+                       continue;
                case K_F1:
                case 'h':
                case '?':
@@ -2263,10 +2912,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
                        continue;
                }
 
-               if (!sort__has_sym)
-                       goto add_exit_option;
-
-               if (browser->selection == NULL)
+               if (!sort__has_sym || browser->selection == NULL)
                        goto skip_annotation;
 
                if (sort__mode == SORT_MODE__BRANCH) {
@@ -2306,11 +2952,16 @@ skip_annotation:
                                             &options[nr_options],
                                             socked_id);
                /* perf script support */
+               if (!is_report_browser(hbt))
+                       goto skip_scripting;
+
                if (browser->he_selection) {
-                       nr_options += add_script_opt(browser,
-                                                    &actions[nr_options],
-                                                    &options[nr_options],
-                                                    thread, NULL);
+                       if (sort__has_thread && thread) {
+                               nr_options += add_script_opt(browser,
+                                                            &actions[nr_options],
+                                                            &options[nr_options],
+                                                            thread, NULL);
+                       }
                        /*
                         * Note that browser->selection != NULL
                         * when browser->he_selection is not NULL,
@@ -2320,16 +2971,18 @@ skip_annotation:
                         *
                         * See hist_browser__show_entry.
                         */
-                       nr_options += add_script_opt(browser,
-                                                    &actions[nr_options],
-                                                    &options[nr_options],
-                                                    NULL, browser->selection->sym);
+                       if (sort__has_sym && browser->selection->sym) {
+                               nr_options += add_script_opt(browser,
+                                                            &actions[nr_options],
+                                                            &options[nr_options],
+                                                            NULL, browser->selection->sym);
+                       }
                }
                nr_options += add_script_opt(browser, &actions[nr_options],
                                             &options[nr_options], NULL, NULL);
                nr_options += add_switch_opt(browser, &actions[nr_options],
                                             &options[nr_options]);
-add_exit_option:
+skip_scripting:
                nr_options += add_exit_opt(browser, &actions[nr_options],
                                           &options[nr_options]);
 
index 0f8dcfdfb10f3856abefc7f252631e8937bacf6b..bd9bf7e343b1e310d20587d0b0f7b75c6b10a574 100644 (file)
@@ -306,7 +306,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
        nr_cols = 0;
 
-       perf_hpp__for_each_format(fmt)
+       hists__for_each_format(hists, fmt)
                col_types[nr_cols++] = G_TYPE_STRING;
 
        store = gtk_tree_store_newv(nr_cols, col_types);
@@ -317,7 +317,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
        col_idx = 0;
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists))
                        continue;
 
@@ -367,7 +367,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 
                col_idx = 0;
 
-               perf_hpp__for_each_format(fmt) {
+               hists__for_each_format(hists, fmt) {
                        if (perf_hpp__should_skip(fmt, h->hists))
                                continue;
 
@@ -396,6 +396,194 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
        gtk_container_add(GTK_CONTAINER(window), view);
 }
 
+static void perf_gtk__add_hierarchy_entries(struct hists *hists,
+                                           struct rb_root *root,
+                                           GtkTreeStore *store,
+                                           GtkTreeIter *parent,
+                                           struct perf_hpp *hpp,
+                                           float min_pcnt)
+{
+       int col_idx = 0;
+       struct rb_node *node;
+       struct hist_entry *he;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       u64 total = hists__total_period(hists);
+       int size;
+
+       for (node = rb_first(root); node; node = rb_next(node)) {
+               GtkTreeIter iter;
+               float percent;
+               char *bf;
+
+               he = rb_entry(node, struct hist_entry, rb_node);
+               if (he->filtered)
+                       continue;
+
+               percent = hist_entry__get_percent_limit(he);
+               if (percent < min_pcnt)
+                       continue;
+
+               gtk_tree_store_append(store, &iter, parent);
+
+               col_idx = 0;
+
+               /* the first hpp_list_node is for overhead columns */
+               fmt_node = list_first_entry(&hists->hpp_formats,
+                                           struct perf_hpp_list_node, list);
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       if (fmt->color)
+                               fmt->color(fmt, hpp, he);
+                       else
+                               fmt->entry(fmt, hpp, he);
+
+                       gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1);
+               }
+
+               bf = hpp->buf;
+               size = hpp->size;
+               perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+                       int ret;
+
+                       if (fmt->color)
+                               ret = fmt->color(fmt, hpp, he);
+                       else
+                               ret = fmt->entry(fmt, hpp, he);
+
+                       snprintf(hpp->buf + ret, hpp->size - ret, "  ");
+                       advance_hpp(hpp, ret + 2);
+               }
+
+               gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1);
+
+               if (!he->leaf) {
+                       hpp->buf = bf;
+                       hpp->size = size;
+
+                       perf_gtk__add_hierarchy_entries(hists, &he->hroot_out,
+                                                       store, &iter, hpp,
+                                                       min_pcnt);
+
+                       if (!hist_entry__has_hierarchy_children(he, min_pcnt)) {
+                               char buf[32];
+                               GtkTreeIter child;
+
+                               snprintf(buf, sizeof(buf), "no entry >= %.2f%%",
+                                        min_pcnt);
+
+                               gtk_tree_store_append(store, &child, &iter);
+                               gtk_tree_store_set(store, &child, col_idx, buf, -1);
+                       }
+               }
+
+               if (symbol_conf.use_callchain && he->leaf) {
+                       if (callchain_param.mode == CHAIN_GRAPH_REL)
+                               total = symbol_conf.cumulate_callchain ?
+                                       he->stat_acc->period : he->stat.period;
+
+                       perf_gtk__add_callchain(&he->sorted_chain, store, &iter,
+                                               col_idx, total);
+               }
+       }
+
+}
+
+static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
+                                    float min_pcnt)
+{
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+       GType col_types[MAX_COLUMNS];
+       GtkCellRenderer *renderer;
+       GtkTreeStore *store;
+       GtkWidget *view;
+       int col_idx;
+       int nr_cols = 0;
+       char s[512];
+       char buf[512];
+       bool first_node, first_col;
+       struct perf_hpp hpp = {
+               .buf            = s,
+               .size           = sizeof(s),
+       };
+
+       hists__for_each_format(hists, fmt) {
+               if (perf_hpp__is_sort_entry(fmt) ||
+                   perf_hpp__is_dynamic_entry(fmt))
+                       break;
+
+               col_types[nr_cols++] = G_TYPE_STRING;
+       }
+       col_types[nr_cols++] = G_TYPE_STRING;
+
+       store = gtk_tree_store_newv(nr_cols, col_types);
+       view = gtk_tree_view_new();
+       renderer = gtk_cell_renderer_text_new();
+
+       col_idx = 0;
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+                                                           -1, fmt->name,
+                                                           renderer, "markup",
+                                                           col_idx++, NULL);
+       }
+
+       /* construct merged column header since sort keys share single column */
+       buf[0] = '\0';
+       first_node = true;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               if (!first_node)
+                       strcat(buf, " / ");
+               first_node = false;
+
+               first_col = true;
+               perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) {
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col)
+                               strcat(buf, "+");
+                       first_col = false;
+
+                       fmt->header(fmt, &hpp, hists_to_evsel(hists));
+                       strcat(buf, ltrim(rtrim(hpp.buf)));
+               }
+       }
+
+       gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+                                                   -1, buf,
+                                                   renderer, "markup",
+                                                   col_idx++, NULL);
+
+       for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+               GtkTreeViewColumn *column;
+
+               column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+               gtk_tree_view_column_set_resizable(column, TRUE);
+
+               if (col_idx == 0) {
+                       gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+                                                         column);
+               }
+       }
+
+       gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+       g_object_unref(GTK_TREE_MODEL(store));
+
+       perf_gtk__add_hierarchy_entries(hists, &hists->entries, store,
+                                       NULL, &hpp, min_pcnt);
+
+       gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+       g_signal_connect(view, "row-activated",
+                        G_CALLBACK(on_row_activated), NULL);
+       gtk_container_add(GTK_CONTAINER(window), view);
+}
+
 int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
                                  const char *help,
                                  struct hist_browser_timer *hbt __maybe_unused,
@@ -463,7 +651,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
                                                        GTK_POLICY_AUTOMATIC,
                                                        GTK_POLICY_AUTOMATIC);
 
-               perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
+               if (symbol_conf.report_hierarchy)
+                       perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt);
+               else
+                       perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
 
                tab_label = gtk_label_new(evname);
 
index bf2a66e254eac35c63a0dd44e83628f34e4dc2f9..3baeaa6e71b5a51e113b8b485df97b7c8ae003a2 100644 (file)
@@ -5,6 +5,7 @@
 #include "../util/util.h"
 #include "../util/sort.h"
 #include "../util/evsel.h"
+#include "../util/evlist.h"
 
 /* hist period print (hpp) functions */
 
@@ -371,7 +372,20 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
        return 0;
 }
 
-#define HPP__COLOR_PRINT_FNS(_name, _fn)               \
+static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a)
+{
+       return a->header == hpp__header_fn;
+}
+
+static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b))
+               return false;
+
+       return a->idx == b->idx;
+}
+
+#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx)         \
        {                                               \
                .name   = _name,                        \
                .header = hpp__header_fn,               \
@@ -381,9 +395,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                .cmp    = hpp__nop_cmp,                 \
                .collapse = hpp__nop_cmp,               \
                .sort   = hpp__sort_ ## _fn,            \
+               .idx    = PERF_HPP__ ## _idx,           \
+               .equal  = hpp__equal,                   \
        }
 
-#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn)           \
+#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx)     \
        {                                               \
                .name   = _name,                        \
                .header = hpp__header_fn,               \
@@ -393,9 +409,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                .cmp    = hpp__nop_cmp,                 \
                .collapse = hpp__nop_cmp,               \
                .sort   = hpp__sort_ ## _fn,            \
+               .idx    = PERF_HPP__ ## _idx,           \
+               .equal  = hpp__equal,                   \
        }
 
-#define HPP__PRINT_FNS(_name, _fn)                     \
+#define HPP__PRINT_FNS(_name, _fn, _idx)               \
        {                                               \
                .name   = _name,                        \
                .header = hpp__header_fn,               \
@@ -404,22 +422,25 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                .cmp    = hpp__nop_cmp,                 \
                .collapse = hpp__nop_cmp,               \
                .sort   = hpp__sort_ ## _fn,            \
+               .idx    = PERF_HPP__ ## _idx,           \
+               .equal  = hpp__equal,                   \
        }
 
 struct perf_hpp_fmt perf_hpp__format[] = {
-       HPP__COLOR_PRINT_FNS("Overhead", overhead),
-       HPP__COLOR_PRINT_FNS("sys", overhead_sys),
-       HPP__COLOR_PRINT_FNS("usr", overhead_us),
-       HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys),
-       HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us),
-       HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc),
-       HPP__PRINT_FNS("Samples", samples),
-       HPP__PRINT_FNS("Period", period)
+       HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD),
+       HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS),
+       HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US),
+       HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS),
+       HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
+       HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
+       HPP__PRINT_FNS("Samples", samples, SAMPLES),
+       HPP__PRINT_FNS("Period", period, PERIOD)
 };
 
-LIST_HEAD(perf_hpp__list);
-LIST_HEAD(perf_hpp__sort_list);
-
+struct perf_hpp_list perf_hpp_list = {
+       .fields = LIST_HEAD_INIT(perf_hpp_list.fields),
+       .sorts  = LIST_HEAD_INIT(perf_hpp_list.sorts),
+};
 
 #undef HPP__COLOR_PRINT_FNS
 #undef HPP__COLOR_ACC_PRINT_FNS
@@ -485,63 +506,60 @@ void perf_hpp__init(void)
                hpp_dimension__add_output(PERF_HPP__PERIOD);
 }
 
-void perf_hpp__column_register(struct perf_hpp_fmt *format)
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+                                   struct perf_hpp_fmt *format)
 {
-       list_add_tail(&format->list, &perf_hpp__list);
+       list_add_tail(&format->list, &list->fields);
 }
 
-void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+                                       struct perf_hpp_fmt *format)
 {
-       list_del(&format->list);
+       list_add_tail(&format->sort_list, &list->sorts);
 }
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
-{
-       list_add_tail(&format->sort_list, &perf_hpp__sort_list);
-}
-
-void perf_hpp__column_enable(unsigned col)
-{
-       BUG_ON(col >= PERF_HPP__MAX_INDEX);
-       perf_hpp__column_register(&perf_hpp__format[col]);
-}
-
-void perf_hpp__column_disable(unsigned col)
+void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
 {
-       BUG_ON(col >= PERF_HPP__MAX_INDEX);
-       perf_hpp__column_unregister(&perf_hpp__format[col]);
+       list_del(&format->list);
 }
 
 void perf_hpp__cancel_cumulate(void)
 {
+       struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp;
+
        if (is_strict_order(field_order))
                return;
 
-       perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC);
-       perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead";
+       ovh = &perf_hpp__format[PERF_HPP__OVERHEAD];
+       acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC];
+
+       perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+               if (acc->equal(acc, fmt)) {
+                       perf_hpp__column_unregister(fmt);
+                       continue;
+               }
+
+               if (ovh->equal(ovh, fmt))
+                       fmt->name = "Overhead";
+       }
 }
 
-void perf_hpp__setup_output_field(void)
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       return a->equal && a->equal(a, b);
+}
+
+void perf_hpp__setup_output_field(struct perf_hpp_list *list)
 {
        struct perf_hpp_fmt *fmt;
 
        /* append sort keys to output field */
-       perf_hpp__for_each_sort_list(fmt) {
-               if (!list_empty(&fmt->list))
-                       continue;
-
-               /*
-                * sort entry fields are dynamically created,
-                * so they can share a same sort key even though
-                * the list is empty.
-                */
-               if (perf_hpp__is_sort_entry(fmt)) {
-                       struct perf_hpp_fmt *pos;
+       perf_hpp_list__for_each_sort_list(list, fmt) {
+               struct perf_hpp_fmt *pos;
 
-                       perf_hpp__for_each_format(pos) {
-                               if (perf_hpp__same_sort_entry(pos, fmt))
-                                       goto next;
-                       }
+               perf_hpp_list__for_each_format(list, pos) {
+                       if (fmt_equal(fmt, pos))
+                               goto next;
                }
 
                perf_hpp__column_register(fmt);
@@ -550,27 +568,17 @@ next:
        }
 }
 
-void perf_hpp__append_sort_keys(void)
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list)
 {
        struct perf_hpp_fmt *fmt;
 
        /* append output fields to sort keys */
-       perf_hpp__for_each_format(fmt) {
-               if (!list_empty(&fmt->sort_list))
-                       continue;
-
-               /*
-                * sort entry fields are dynamically created,
-                * so they can share a same sort key even though
-                * the list is empty.
-                */
-               if (perf_hpp__is_sort_entry(fmt)) {
-                       struct perf_hpp_fmt *pos;
+       perf_hpp_list__for_each_format(list, fmt) {
+               struct perf_hpp_fmt *pos;
 
-                       perf_hpp__for_each_sort_list(pos) {
-                               if (perf_hpp__same_sort_entry(pos, fmt))
-                                       goto next;
-                       }
+               perf_hpp_list__for_each_sort_list(list, pos) {
+                       if (fmt_equal(fmt, pos))
+                               goto next;
                }
 
                perf_hpp__register_sort_field(fmt);
@@ -579,20 +587,29 @@ next:
        }
 }
 
-void perf_hpp__reset_output_field(void)
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+       if (fmt->free)
+               fmt->free(fmt);
+}
+
+void perf_hpp__reset_output_field(struct perf_hpp_list *list)
 {
        struct perf_hpp_fmt *fmt, *tmp;
 
        /* reset output fields */
-       perf_hpp__for_each_format_safe(fmt, tmp) {
+       perf_hpp_list__for_each_format_safe(list, fmt, tmp) {
                list_del_init(&fmt->list);
                list_del_init(&fmt->sort_list);
+               fmt_free(fmt);
        }
 
        /* reset sort keys */
-       perf_hpp__for_each_sort_list_safe(fmt, tmp) {
+       perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) {
                list_del_init(&fmt->list);
                list_del_init(&fmt->sort_list);
+               fmt_free(fmt);
        }
 }
 
@@ -606,7 +623,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
        bool first = true;
        struct perf_hpp dummy_hpp;
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists))
                        continue;
 
@@ -624,22 +641,39 @@ unsigned int hists__sort_list_width(struct hists *hists)
        return ret;
 }
 
-void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+unsigned int hists__overhead_width(struct hists *hists)
 {
-       int idx;
-
-       if (perf_hpp__is_sort_entry(fmt))
-               return perf_hpp__reset_sort_width(fmt, hists);
+       struct perf_hpp_fmt *fmt;
+       int ret = 0;
+       bool first = true;
+       struct perf_hpp dummy_hpp;
 
-       for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) {
-               if (fmt == &perf_hpp__format[idx])
+       hists__for_each_format(hists, fmt) {
+               if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
                        break;
+
+               if (first)
+                       first = false;
+               else
+                       ret += 2;
+
+               ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
        }
 
-       if (idx == PERF_HPP__MAX_INDEX)
+       return ret;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+       if (perf_hpp__is_sort_entry(fmt))
+               return perf_hpp__reset_sort_width(fmt, hists);
+
+       if (perf_hpp__is_dynamic_entry(fmt))
                return;
 
-       switch (idx) {
+       BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX);
+
+       switch (fmt->idx) {
        case PERF_HPP__OVERHEAD:
        case PERF_HPP__OVERHEAD_SYS:
        case PERF_HPP__OVERHEAD_US:
@@ -667,7 +701,7 @@ void perf_hpp__set_user_width(const char *width_list_str)
        struct perf_hpp_fmt *fmt;
        const char *ptr = width_list_str;
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                char *p;
 
                int len = strtol(ptr, &p, 10);
@@ -679,3 +713,71 @@ void perf_hpp__set_user_width(const char *width_list_str)
                        break;
        }
 }
+
+static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt)
+{
+       struct perf_hpp_list_node *node = NULL;
+       struct perf_hpp_fmt *fmt_copy;
+       bool found = false;
+       bool skip = perf_hpp__should_skip(fmt, hists);
+
+       list_for_each_entry(node, &hists->hpp_formats, list) {
+               if (node->level == fmt->level) {
+                       found = true;
+                       break;
+               }
+       }
+
+       if (!found) {
+               node = malloc(sizeof(*node));
+               if (node == NULL)
+                       return -1;
+
+               node->skip = skip;
+               node->level = fmt->level;
+               perf_hpp_list__init(&node->hpp);
+
+               hists->nr_hpp_node++;
+               list_add_tail(&node->list, &hists->hpp_formats);
+       }
+
+       fmt_copy = perf_hpp_fmt__dup(fmt);
+       if (fmt_copy == NULL)
+               return -1;
+
+       if (!skip)
+               node->skip = false;
+
+       list_add_tail(&fmt_copy->list, &node->hpp.fields);
+       list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts);
+
+       return 0;
+}
+
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+                                 struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+       struct perf_hpp_fmt *fmt;
+       struct hists *hists;
+       int ret;
+
+       if (!symbol_conf.report_hierarchy)
+               return 0;
+
+       evlist__for_each(evlist, evsel) {
+               hists = evsel__hists(evsel);
+
+               perf_hpp_list__for_each_sort_list(list, fmt) {
+                       if (perf_hpp__is_dynamic_entry(fmt) &&
+                           !perf_hpp__defined_dynamic_entry(fmt, hists))
+                               continue;
+
+                       ret = add_hierarchy_fmt(hists, fmt);
+                       if (ret < 0)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
index 387110d50b002557d99e723605407c3db990d71a..7aff5acf3265782e03254de2d8bc1dfafb56e03a 100644 (file)
@@ -165,8 +165,28 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
        return ret;
 }
 
+/*
+ * If have one single callchain root, don't bother printing
+ * its percentage (100 % in fractal mode and the same percentage
+ * than the hist in graph mode). This also avoid one level of column.
+ *
+ * However when percent-limit applied, it's possible that single callchain
+ * node have different (non-100% in fractal mode) percentage.
+ */
+static bool need_percent_display(struct rb_node *node, u64 parent_samples)
+{
+       struct callchain_node *cnode;
+
+       if (rb_next(node))
+               return true;
+
+       cnode = rb_entry(node, struct callchain_node, rb_node);
+       return callchain_cumul_hits(cnode) != parent_samples;
+}
+
 static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
-                                      u64 total_samples, int left_margin)
+                                      u64 total_samples, u64 parent_samples,
+                                      int left_margin)
 {
        struct callchain_node *cnode;
        struct callchain_list *chain;
@@ -177,13 +197,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
        int ret = 0;
        char bf[1024];
 
-       /*
-        * If have one single callchain root, don't bother printing
-        * its percentage (100 % in fractal mode and the same percentage
-        * than the hist in graph mode). This also avoid one level of column.
-        */
        node = rb_first(root);
-       if (node && !rb_next(node)) {
+       if (node && !need_percent_display(node, parent_samples)) {
                cnode = rb_entry(node, struct callchain_node, rb_node);
                list_for_each_entry(chain, &cnode->val, list) {
                        /*
@@ -213,9 +228,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
                root = &cnode->rb_root;
        }
 
+       if (callchain_param.mode == CHAIN_GRAPH_REL)
+               total_samples = parent_samples;
+
        ret += __callchain__fprintf_graph(fp, root, total_samples,
                                          1, 1, left_margin);
-       ret += fprintf(fp, "\n");
+       if (ret) {
+               /* do not add a blank line if it printed nothing */
+               ret += fprintf(fp, "\n");
+       }
 
        return ret;
 }
@@ -323,16 +344,19 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
                                            u64 total_samples, int left_margin,
                                            FILE *fp)
 {
+       u64 parent_samples = he->stat.period;
+
+       if (symbol_conf.cumulate_callchain)
+               parent_samples = he->stat_acc->period;
+
        switch (callchain_param.mode) {
        case CHAIN_GRAPH_REL:
-               return callchain__fprintf_graph(fp, &he->sorted_chain,
-                                               symbol_conf.cumulate_callchain ?
-                                               he->stat_acc->period : he->stat.period,
-                                               left_margin);
+               return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+                                               parent_samples, left_margin);
                break;
        case CHAIN_GRAPH_ABS:
                return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
-                                               left_margin);
+                                               parent_samples, left_margin);
                break;
        case CHAIN_FLAT:
                return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
@@ -349,45 +373,66 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
        return 0;
 }
 
-static size_t hist_entry__callchain_fprintf(struct hist_entry *he,
-                                           struct hists *hists,
-                                           FILE *fp)
+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
 {
-       int left_margin = 0;
-       u64 total_period = hists->stats.total_period;
+       const char *sep = symbol_conf.field_sep;
+       struct perf_hpp_fmt *fmt;
+       char *start = hpp->buf;
+       int ret;
+       bool first = true;
 
-       if (field_order == NULL && (sort_order == NULL ||
-                                   !prefixcmp(sort_order, "comm"))) {
-               struct perf_hpp_fmt *fmt;
+       if (symbol_conf.exclude_other && !he->parent)
+               return 0;
 
-               perf_hpp__for_each_format(fmt) {
-                       if (!perf_hpp__is_sort_entry(fmt))
-                               continue;
+       hists__for_each_format(he->hists, fmt) {
+               if (perf_hpp__should_skip(fmt, he->hists))
+                       continue;
 
-                       /* must be 'comm' sort entry */
-                       left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists));
-                       left_margin -= thread__comm_len(he->thread);
-                       break;
-               }
+               /*
+                * If there's no field_sep, we still need
+                * to display initial '  '.
+                */
+               if (!sep || !first) {
+                       ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: "  ");
+                       advance_hpp(hpp, ret);
+               } else
+                       first = false;
+
+               if (perf_hpp__use_color() && fmt->color)
+                       ret = fmt->color(fmt, hpp, he);
+               else
+                       ret = fmt->entry(fmt, hpp, he);
+
+               ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+               advance_hpp(hpp, ret);
        }
-       return hist_entry_callchain__fprintf(he, total_period, left_margin, fp);
+
+       return hpp->buf - start;
 }
 
-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
+                                        struct perf_hpp *hpp,
+                                        struct hists *hists,
+                                        FILE *fp)
 {
        const char *sep = symbol_conf.field_sep;
        struct perf_hpp_fmt *fmt;
-       char *start = hpp->buf;
-       int ret;
+       struct perf_hpp_list_node *fmt_node;
+       char *buf = hpp->buf;
+       size_t size = hpp->size;
+       int ret, printed = 0;
        bool first = true;
 
        if (symbol_conf.exclude_other && !he->parent)
                return 0;
 
-       perf_hpp__for_each_format(fmt) {
-               if (perf_hpp__should_skip(fmt, he->hists))
-                       continue;
+       ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, "");
+       advance_hpp(hpp, ret);
 
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
                /*
                 * If there's no field_sep, we still need
                 * to display initial '  '.
@@ -403,10 +448,47 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
                else
                        ret = fmt->entry(fmt, hpp, he);
 
+               ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
                advance_hpp(hpp, ret);
        }
 
-       return hpp->buf - start;
+       if (!sep)
+               ret = scnprintf(hpp->buf, hpp->size, "%*s",
+                               (hists->nr_hpp_node - 2) * HIERARCHY_INDENT, "");
+       advance_hpp(hpp, ret);
+
+       printed += fprintf(fp, "%s", buf);
+
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               hpp->buf  = buf;
+               hpp->size = size;
+
+               /*
+                * No need to call hist_entry__snprintf_alignment() since this
+                * fmt is always the last column in the hierarchy mode.
+                */
+               if (perf_hpp__use_color() && fmt->color)
+                       fmt->color(fmt, hpp, he);
+               else
+                       fmt->entry(fmt, hpp, he);
+
+               /*
+                * dynamic entries are right-aligned but we want left-aligned
+                * in the hierarchy mode
+                */
+               printed += fprintf(fp, "%s%s", sep ?: "  ", ltrim(buf));
+       }
+       printed += putc('\n', fp);
+
+       if (symbol_conf.use_callchain && he->leaf) {
+               u64 total = hists__total_period(hists);
+
+               printed += hist_entry_callchain__fprintf(he, total, 0, fp);
+               goto out;
+       }
+
+out:
+       return printed;
 }
 
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
@@ -418,24 +500,134 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
                .buf            = bf,
                .size           = size,
        };
+       u64 total_period = hists->stats.total_period;
 
        if (size == 0 || size > bfsz)
                size = hpp.size = bfsz;
 
+       if (symbol_conf.report_hierarchy)
+               return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp);
+
        hist_entry__snprintf(he, &hpp);
 
        ret = fprintf(fp, "%s\n", bf);
 
        if (symbol_conf.use_callchain)
-               ret += hist_entry__callchain_fprintf(he, hists, fp);
+               ret += hist_entry_callchain__fprintf(he, total_period, 0, fp);
 
        return ret;
 }
 
+static int print_hierarchy_indent(const char *sep, int indent,
+                                 const char *line, FILE *fp)
+{
+       if (sep != NULL || indent < 2)
+               return 0;
+
+       return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
+}
+
+static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
+                                 const char *sep, FILE *fp)
+{
+       bool first_node, first_col;
+       int indent;
+       int depth;
+       unsigned width = 0;
+       unsigned header_width = 0;
+       struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
+
+       indent = hists->nr_hpp_node;
+
+       /* preserve max indent depth for column headers */
+       print_hierarchy_indent(sep, indent, spaces, fp);
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               fmt->header(fmt, hpp, hists_to_evsel(hists));
+               fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
+       }
+
+       /* combine sort headers with ' / ' */
+       first_node = true;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               if (!first_node)
+                       header_width += fprintf(fp, " / ");
+               first_node = false;
+
+               first_col = true;
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col)
+                               header_width += fprintf(fp, "+");
+                       first_col = false;
+
+                       fmt->header(fmt, hpp, hists_to_evsel(hists));
+                       rtrim(hpp->buf);
+
+                       header_width += fprintf(fp, "%s", ltrim(hpp->buf));
+               }
+       }
+
+       fprintf(fp, "\n# ");
+
+       /* preserve max indent depth for initial dots */
+       print_hierarchy_indent(sep, indent, dots, fp);
+
+       /* the first hpp_list_node is for overhead columns */
+       fmt_node = list_first_entry(&hists->hpp_formats,
+                                   struct perf_hpp_list_node, list);
+
+       first_col = true;
+       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+               if (!first_col)
+                       fprintf(fp, "%s", sep ?: "..");
+               first_col = false;
+
+               width = fmt->width(fmt, hpp, hists_to_evsel(hists));
+               fprintf(fp, "%.*s", width, dots);
+       }
+
+       depth = 0;
+       list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+               first_col = true;
+               width = depth * HIERARCHY_INDENT;
+
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+                       if (perf_hpp__should_skip(fmt, hists))
+                               continue;
+
+                       if (!first_col)
+                               width++;  /* for '+' sign between column header */
+                       first_col = false;
+
+                       width += fmt->width(fmt, hpp, hists_to_evsel(hists));
+               }
+
+               if (width > header_width)
+                       header_width = width;
+
+               depth++;
+       }
+
+       fprintf(fp, "%s%-.*s", sep ?: "  ", header_width, dots);
+
+       fprintf(fp, "\n#\n");
+
+       return 2;
+}
+
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
                      int max_cols, float min_pcnt, FILE *fp)
 {
        struct perf_hpp_fmt *fmt;
+       struct perf_hpp_list_node *fmt_node;
        struct rb_node *nd;
        size_t ret = 0;
        unsigned int width;
@@ -449,10 +641,11 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
        bool first = true;
        size_t linesz;
        char *line = NULL;
+       unsigned indent;
 
        init_rem_hits();
 
-       perf_hpp__for_each_format(fmt)
+       hists__for_each_format(hists, fmt)
                perf_hpp__reset_width(fmt, hists);
 
        if (symbol_conf.col_width_list_str)
@@ -463,7 +656,16 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 
        fprintf(fp, "# ");
 
-       perf_hpp__for_each_format(fmt) {
+       if (symbol_conf.report_hierarchy) {
+               list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
+                       perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+                               perf_hpp__reset_width(fmt, hists);
+               }
+               nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp);
+               goto print_entries;
+       }
+
+       hists__for_each_format(hists, fmt) {
                if (perf_hpp__should_skip(fmt, hists))
                        continue;
 
@@ -487,7 +689,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 
        fprintf(fp, "# ");
 
-       perf_hpp__for_each_format(fmt) {
+       hists__for_each_format(hists, fmt) {
                unsigned int i;
 
                if (perf_hpp__should_skip(fmt, hists))
@@ -520,7 +722,9 @@ print_entries:
                goto out;
        }
 
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+       indent = hists__overhead_width(hists) + 4;
+
+       for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
                float percent;
 
@@ -536,6 +740,20 @@ print_entries:
                if (max_rows && ++nr_rows >= max_rows)
                        break;
 
+               /*
+                * If all children are filtered out or percent-limited,
+                * display "no entry >= x.xx%" message.
+                */
+               if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) {
+                       int depth = hists->nr_hpp_node + h->depth + 1;
+
+                       print_hierarchy_indent(sep, depth, spaces, fp);
+                       fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt);
+
+                       if (max_rows && ++nr_rows >= max_rows)
+                               break;
+               }
+
                if (h->ms.map == NULL && verbose > 1) {
                        __map_groups__fprintf_maps(h->thread->mg,
                                                   MAP__FUNCTION, fp);
index 5eec53a3f4ac7dbedb54776f746705acfa4fa2b4..eea25e2424e99172715c681a05ffc68b3c58e78a 100644 (file)
@@ -82,6 +82,7 @@ libperf-y += parse-branch-options.o
 libperf-y += parse-regs-options.o
 libperf-y += term.o
 libperf-y += help-unknown-cmd.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -105,8 +106,17 @@ libperf-y += scripting-engines/
 
 libperf-$(CONFIG_ZLIB) += zlib.o
 libperf-$(CONFIG_LZMA) += lzma.o
+libperf-y += demangle-java.o
+
+ifdef CONFIG_JITDUMP
+libperf-$(CONFIG_LIBELF) += jitdump.o
+libperf-$(CONFIG_LIBELF) += genelf.o
+libperf-$(CONFIG_LIBELF) += genelf_debug.o
+endif
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o  += -Wno-packed
 
 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
        $(call rule_mkdir)
index 360fda01f3b0d17369254d03fe16daf24f30c698..ec164fe70718df1480b02733d8701c7ab2b74297 100644 (file)
@@ -478,10 +478,11 @@ void auxtrace_heap__pop(struct auxtrace_heap *heap)
                         heap_array[last].ordinal);
 }
 
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+                                      struct perf_evlist *evlist)
 {
        if (itr)
-               return itr->info_priv_size(itr);
+               return itr->info_priv_size(itr, evlist);
        return 0;
 }
 
@@ -852,7 +853,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
        int err;
 
        pr_debug2("Synthesizing auxtrace information\n");
-       priv_size = auxtrace_record__info_priv_size(itr);
+       priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
        ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
        if (!ev)
                return -ENOMEM;
index b86f90db1352a6c8635e3ea5d02aa3c21bccc323..e5a8e2d4f2af4e9b717be7ce27c587f569983027 100644 (file)
@@ -293,7 +293,8 @@ struct auxtrace_record {
        int (*recording_options)(struct auxtrace_record *itr,
                                 struct perf_evlist *evlist,
                                 struct record_opts *opts);
-       size_t (*info_priv_size)(struct auxtrace_record *itr);
+       size_t (*info_priv_size)(struct auxtrace_record *itr,
+                                struct perf_evlist *evlist);
        int (*info_fill)(struct auxtrace_record *itr,
                         struct perf_session *session,
                         struct auxtrace_info_event *auxtrace_info,
@@ -429,7 +430,8 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
 int auxtrace_record__options(struct auxtrace_record *itr,
                             struct perf_evlist *evlist,
                             struct record_opts *opts);
-size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+                                      struct perf_evlist *evlist);
 int auxtrace_record__info_fill(struct auxtrace_record *itr,
                               struct perf_session *session,
                               struct auxtrace_info_event *auxtrace_info,
index 540a7efa657ea8668eeddf1b476bbcb3c0490bbc..0967ce601931685ed294827e8aef7c30c47736c6 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/libbpf.h>
+#include <bpf/bpf.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include "perf.h"
@@ -16,6 +17,7 @@
 #include "llvm-utils.h"
 #include "probe-event.h"
 #include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
 #include "llvm-utils.h"
 
 #define DEFINE_PRINT_FN(name, level) \
@@ -108,8 +110,8 @@ void bpf__clear(void)
 }
 
 static void
-bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused,
-                    void *_priv)
+clear_prog_priv(struct bpf_program *prog __maybe_unused,
+               void *_priv)
 {
        struct bpf_prog_priv *priv = _priv;
 
@@ -337,7 +339,7 @@ config_bpf_program(struct bpf_program *prog)
        }
        pr_debug("bpf: config '%s' is ok\n", config_str);
 
-       err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear);
+       err = bpf_program__set_private(prog, priv, clear_prog_priv);
        if (err) {
                pr_debug("Failed to set priv for program '%s'\n", config_str);
                goto errout;
@@ -739,6 +741,682 @@ int bpf__foreach_tev(struct bpf_object *obj,
        return 0;
 }
 
+enum bpf_map_op_type {
+       BPF_MAP_OP_SET_VALUE,
+       BPF_MAP_OP_SET_EVSEL,
+};
+
+enum bpf_map_key_type {
+       BPF_MAP_KEY_ALL,
+       BPF_MAP_KEY_RANGES,
+};
+
+struct bpf_map_op {
+       struct list_head list;
+       enum bpf_map_op_type op_type;
+       enum bpf_map_key_type key_type;
+       union {
+               struct parse_events_array array;
+       } k;
+       union {
+               u64 value;
+               struct perf_evsel *evsel;
+       } v;
+};
+
+struct bpf_map_priv {
+       struct list_head ops_list;
+};
+
+static void
+bpf_map_op__delete(struct bpf_map_op *op)
+{
+       if (!list_empty(&op->list))
+               list_del(&op->list);
+       if (op->key_type == BPF_MAP_KEY_RANGES)
+               parse_events__clear_array(&op->k.array);
+       free(op);
+}
+
+static void
+bpf_map_priv__purge(struct bpf_map_priv *priv)
+{
+       struct bpf_map_op *pos, *n;
+
+       list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
+               list_del_init(&pos->list);
+               bpf_map_op__delete(pos);
+       }
+}
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+                   void *_priv)
+{
+       struct bpf_map_priv *priv = _priv;
+
+       bpf_map_priv__purge(priv);
+       free(priv);
+}
+
+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
+{
+       op->key_type = BPF_MAP_KEY_ALL;
+       if (!term)
+               return 0;
+
+       if (term->array.nr_ranges) {
+               size_t memsz = term->array.nr_ranges *
+                               sizeof(op->k.array.ranges[0]);
+
+               op->k.array.ranges = memdup(term->array.ranges, memsz);
+               if (!op->k.array.ranges) {
+                       pr_debug("No enough memory to alloc indices for map\n");
+                       return -ENOMEM;
+               }
+               op->key_type = BPF_MAP_KEY_RANGES;
+               op->k.array.nr_ranges = term->array.nr_ranges;
+       }
+       return 0;
+}
+
+static struct bpf_map_op *
+bpf_map_op__new(struct parse_events_term *term)
+{
+       struct bpf_map_op *op;
+       int err;
+
+       op = zalloc(sizeof(*op));
+       if (!op) {
+               pr_debug("Failed to alloc bpf_map_op\n");
+               return ERR_PTR(-ENOMEM);
+       }
+       INIT_LIST_HEAD(&op->list);
+
+       err = bpf_map_op_setkey(op, term);
+       if (err) {
+               free(op);
+               return ERR_PTR(err);
+       }
+       return op;
+}
+
+static int
+bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
+{
+       struct bpf_map_priv *priv;
+       const char *map_name;
+       int err;
+
+       map_name = bpf_map__get_name(map);
+       err = bpf_map__get_private(map, (void **)&priv);
+       if (err) {
+               pr_debug("Failed to get private from map %s\n", map_name);
+               return err;
+       }
+
+       if (!priv) {
+               priv = zalloc(sizeof(*priv));
+               if (!priv) {
+                       pr_debug("No enough memory to alloc map private\n");
+                       return -ENOMEM;
+               }
+               INIT_LIST_HEAD(&priv->ops_list);
+
+               if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) {
+                       free(priv);
+                       return -BPF_LOADER_ERRNO__INTERNAL;
+               }
+       }
+
+       list_add_tail(&op->list, &priv->ops_list);
+       return 0;
+}
+
+static struct bpf_map_op *
+bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
+{
+       struct bpf_map_op *op;
+       int err;
+
+       op = bpf_map_op__new(term);
+       if (IS_ERR(op))
+               return op;
+
+       err = bpf_map__add_op(map, op);
+       if (err) {
+               bpf_map_op__delete(op);
+               return ERR_PTR(err);
+       }
+       return op;
+}
+
+static int
+__bpf_map__config_value(struct bpf_map *map,
+                       struct parse_events_term *term)
+{
+       struct bpf_map_def def;
+       struct bpf_map_op *op;
+       const char *map_name;
+       int err;
+
+       map_name = bpf_map__get_name(map);
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("Unable to get map definition from '%s'\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       if (def.type != BPF_MAP_TYPE_ARRAY) {
+               pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+       }
+       if (def.key_size < sizeof(unsigned int)) {
+               pr_debug("Map %s has incorrect key size\n", map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+       }
+       switch (def.value_size) {
+       case 1:
+       case 2:
+       case 4:
+       case 8:
+               break;
+       default:
+               pr_debug("Map %s has incorrect value size\n", map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+       }
+
+       op = bpf_map__add_newop(map, term);
+       if (IS_ERR(op))
+               return PTR_ERR(op);
+       op->op_type = BPF_MAP_OP_SET_VALUE;
+       op->v.value = term->val.num;
+       return 0;
+}
+
+static int
+bpf_map__config_value(struct bpf_map *map,
+                     struct parse_events_term *term,
+                     struct perf_evlist *evlist __maybe_unused)
+{
+       if (!term->err_val) {
+               pr_debug("Config value not set\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+       }
+
+       if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
+               pr_debug("ERROR: wrong value type for 'value'\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+       }
+
+       return __bpf_map__config_value(map, term);
+}
+
+static int
+__bpf_map__config_event(struct bpf_map *map,
+                       struct parse_events_term *term,
+                       struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+       struct bpf_map_def def;
+       struct bpf_map_op *op;
+       const char *map_name;
+       int err;
+
+       map_name = bpf_map__get_name(map);
+       evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
+       if (!evsel) {
+               pr_debug("Event (for '%s') '%s' doesn't exist\n",
+                        map_name, term->val.str);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+       }
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("Unable to get map definition from '%s'\n",
+                        map_name);
+               return err;
+       }
+
+       /*
+        * No need to check key_size and value_size:
+        * kernel has already checked them.
+        */
+       if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+               pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+       }
+
+       op = bpf_map__add_newop(map, term);
+       if (IS_ERR(op))
+               return PTR_ERR(op);
+       op->op_type = BPF_MAP_OP_SET_EVSEL;
+       op->v.evsel = evsel;
+       return 0;
+}
+
+static int
+bpf_map__config_event(struct bpf_map *map,
+                     struct parse_events_term *term,
+                     struct perf_evlist *evlist)
+{
+       if (!term->err_val) {
+               pr_debug("Config value not set\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+       }
+
+       if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
+               pr_debug("ERROR: wrong value type for 'event'\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+       }
+
+       return __bpf_map__config_event(map, term, evlist);
+}
+
+struct bpf_obj_config__map_func {
+       const char *config_opt;
+       int (*config_func)(struct bpf_map *, struct parse_events_term *,
+                          struct perf_evlist *);
+};
+
+struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
+       {"value", bpf_map__config_value},
+       {"event", bpf_map__config_event},
+};
+
+static int
+config_map_indices_range_check(struct parse_events_term *term,
+                              struct bpf_map *map,
+                              const char *map_name)
+{
+       struct parse_events_array *array = &term->array;
+       struct bpf_map_def def;
+       unsigned int i;
+       int err;
+
+       if (!array->nr_ranges)
+               return 0;
+       if (!array->ranges) {
+               pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+                        map_name, (int)array->nr_ranges);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("ERROR: Unable to get map definition from '%s'\n",
+                        map_name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       for (i = 0; i < array->nr_ranges; i++) {
+               unsigned int start = array->ranges[i].start;
+               size_t length = array->ranges[i].length;
+               unsigned int idx = start + length - 1;
+
+               if (idx >= def.max_entries) {
+                       pr_debug("ERROR: index %d too large\n", idx);
+                       return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+               }
+       }
+       return 0;
+}
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+                   struct parse_events_term *term,
+                   struct perf_evlist *evlist,
+                   int *key_scan_pos)
+{
+       /* key is "map:<mapname>.<config opt>" */
+       char *map_name = strdup(term->config + sizeof("map:") - 1);
+       struct bpf_map *map;
+       int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+       char *map_opt;
+       size_t i;
+
+       if (!map_name)
+               return -ENOMEM;
+
+       map_opt = strchr(map_name, '.');
+       if (!map_opt) {
+               pr_debug("ERROR: Invalid map config: %s\n", map_name);
+               goto out;
+       }
+
+       *map_opt++ = '\0';
+       if (*map_opt == '\0') {
+               pr_debug("ERROR: Invalid map option: %s\n", term->config);
+               goto out;
+       }
+
+       map = bpf_object__get_map_by_name(obj, map_name);
+       if (!map) {
+               pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+               err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+               goto out;
+       }
+
+       *key_scan_pos += strlen(map_opt);
+       err = config_map_indices_range_check(term, map, map_name);
+       if (err)
+               goto out;
+       *key_scan_pos -= strlen(map_opt);
+
+       for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
+               struct bpf_obj_config__map_func *func =
+                               &bpf_obj_config__map_funcs[i];
+
+               if (strcmp(map_opt, func->config_opt) == 0) {
+                       err = func->config_func(map, term, evlist);
+                       goto out;
+               }
+       }
+
+       pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+       err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+       free(map_name);
+       if (!err)
+               key_scan_pos += strlen(map_opt);
+       return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+                   struct parse_events_term *term,
+                   struct perf_evlist *evlist,
+                   int *error_pos)
+{
+       int key_scan_pos = 0;
+       int err;
+
+       if (!obj || !term || !term->config)
+               return -EINVAL;
+
+       if (!prefixcmp(term->config, "map:")) {
+               key_scan_pos = sizeof("map:") - 1;
+               err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+               goto out;
+       }
+       err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+       if (error_pos)
+               *error_pos = key_scan_pos;
+       return err;
+
+}
+
+typedef int (*map_config_func_t)(const char *name, int map_fd,
+                                struct bpf_map_def *pdef,
+                                struct bpf_map_op *op,
+                                void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+                     void *arg, const char *name,
+                     int map_fd, struct bpf_map_def *pdef,
+                     struct bpf_map_op *op)
+{
+       unsigned int i;
+       int err;
+
+       for (i = 0; i < pdef->max_entries; i++) {
+               err = func(name, map_fd, pdef, op, &i, arg);
+               if (err) {
+                       pr_debug("ERROR: failed to insert value to %s[%u]\n",
+                                name, i);
+                       return err;
+               }
+       }
+       return 0;
+}
+
+static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+                        const char *name, int map_fd,
+                        struct bpf_map_def *pdef,
+                        struct bpf_map_op *op)
+{
+       unsigned int i, j;
+       int err;
+
+       for (i = 0; i < op->k.array.nr_ranges; i++) {
+               unsigned int start = op->k.array.ranges[i].start;
+               size_t length = op->k.array.ranges[i].length;
+
+               for (j = 0; j < length; j++) {
+                       unsigned int idx = start + j;
+
+                       err = func(name, map_fd, pdef, op, &idx, arg);
+                       if (err) {
+                               pr_debug("ERROR: failed to insert value to %s[%u]\n",
+                                        name, idx);
+                               return err;
+                       }
+               }
+       }
+       return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+                          map_config_func_t func,
+                          void *arg)
+{
+       int err, map_fd;
+       const char *name;
+       struct bpf_map_op *op;
+       struct bpf_map_def def;
+       struct bpf_map_priv *priv;
+
+       name = bpf_map__get_name(map);
+
+       err = bpf_map__get_private(map, (void **)&priv);
+       if (err) {
+               pr_debug("ERROR: failed to get private from map %s\n", name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+       if (!priv || list_empty(&priv->ops_list)) {
+               pr_debug("INFO: nothing to config for map %s\n", name);
+               return 0;
+       }
+
+       err = bpf_map__get_def(map, &def);
+       if (err) {
+               pr_debug("ERROR: failed to get definition from map %s\n", name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+       map_fd = bpf_map__get_fd(map);
+       if (map_fd < 0) {
+               pr_debug("ERROR: failed to get fd from map %s\n", name);
+               return map_fd;
+       }
+
+       list_for_each_entry(op, &priv->ops_list, list) {
+               switch (def.type) {
+               case BPF_MAP_TYPE_ARRAY:
+               case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+                       switch (op->key_type) {
+                       case BPF_MAP_KEY_ALL:
+                               err = foreach_key_array_all(func, arg, name,
+                                                           map_fd, &def, op);
+                               break;
+                       case BPF_MAP_KEY_RANGES:
+                               err = foreach_key_array_ranges(func, arg, name,
+                                                              map_fd, &def,
+                                                              op);
+                               break;
+                       default:
+                               pr_debug("ERROR: keytype for map '%s' invalid\n",
+                                        name);
+                               return -BPF_LOADER_ERRNO__INTERNAL;
+                       }
+                       if (err)
+                               return err;
+                       break;
+               default:
+                       pr_debug("ERROR: type of '%s' incorrect\n", name);
+                       return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+               }
+       }
+
+       return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+                          size_t val_size, u64 val)
+{
+       int err = 0;
+
+       switch (val_size) {
+       case 1: {
+               u8 _val = (u8)(val);
+               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+               break;
+       }
+       case 2: {
+               u16 _val = (u16)(val);
+               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+               break;
+       }
+       case 4: {
+               u32 _val = (u32)(val);
+               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+               break;
+       }
+       case 8: {
+               err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+               break;
+       }
+       default:
+               pr_debug("ERROR: invalid value size\n");
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+       }
+       if (err && errno)
+               err = -errno;
+       return err;
+}
+
+static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+                          struct perf_evsel *evsel)
+{
+       struct xyarray *xy = evsel->fd;
+       struct perf_event_attr *attr;
+       unsigned int key, events;
+       bool check_pass = false;
+       int *evt_fd;
+       int err;
+
+       if (!xy) {
+               pr_debug("ERROR: evsel not ready for map %s\n", name);
+               return -BPF_LOADER_ERRNO__INTERNAL;
+       }
+
+       if (xy->row_size / xy->entry_size != 1) {
+               pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+                        name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+       }
+
+       attr = &evsel->attr;
+       if (attr->inherit) {
+               pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+       }
+
+       if (perf_evsel__is_bpf_output(evsel))
+               check_pass = true;
+       if (attr->type == PERF_TYPE_RAW)
+               check_pass = true;
+       if (attr->type == PERF_TYPE_HARDWARE)
+               check_pass = true;
+       if (!check_pass) {
+               pr_debug("ERROR: Event type is wrong for map %s\n", name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+       }
+
+       events = xy->entries / (xy->row_size / xy->entry_size);
+       key = *((unsigned int *)pkey);
+       if (key >= events) {
+               pr_debug("ERROR: there is no event %d for map %s\n",
+                        key, name);
+               return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+       }
+       evt_fd = xyarray__entry(xy, key, 0);
+       err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+       if (err && errno)
+               err = -errno;
+       return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+                            struct bpf_map_def *pdef __maybe_unused,
+                            struct bpf_map_op *op,
+                            void *pkey, void *arg __maybe_unused)
+{
+       int err;
+
+       switch (op->op_type) {
+       case BPF_MAP_OP_SET_VALUE:
+               err = apply_config_value_for_key(map_fd, pkey,
+                                                pdef->value_size,
+                                                op->v.value);
+               break;
+       case BPF_MAP_OP_SET_EVSEL:
+               err = apply_config_evsel_for_key(name, map_fd, pkey,
+                                                op->v.evsel);
+               break;
+       default:
+               pr_debug("ERROR: unknown value type for '%s'\n", name);
+               err = -BPF_LOADER_ERRNO__INTERNAL;
+       }
+       return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+       return bpf_map_config_foreach_key(map,
+                                         apply_obj_config_map_for_key,
+                                         NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+       struct bpf_map *map;
+       int err;
+
+       bpf_map__for_each(map, obj) {
+               err = apply_obj_config_map(map);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+       struct bpf_object *obj, *tmp;
+       int err;
+
+       bpf_object__for_each_safe(obj, tmp) {
+               err = apply_obj_config_object(obj);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
 #define ERRNO_OFFSET(e)                ((e) - __BPF_LOADER_ERRNO__START)
 #define ERRCODE_OFFSET(c)      ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
 #define NR_ERRNO       (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -753,6 +1431,20 @@ static const char *bpf_loader_strerror_table[NR_ERRNO] = {
        [ERRCODE_OFFSET(PROLOGUE)]      = "Failed to generate prologue",
        [ERRCODE_OFFSET(PROLOGUE2BIG)]  = "Prologue too big for program",
        [ERRCODE_OFFSET(PROLOGUEOOB)]   = "Offset out of bound for prologue",
+       [ERRCODE_OFFSET(OBJCONF_OPT)]   = "Invalid object config option",
+       [ERRCODE_OFFSET(OBJCONF_CONF)]  = "Config value not set (missing '=')",
+       [ERRCODE_OFFSET(OBJCONF_MAP_OPT)]       = "Invalid object map config option",
+       [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)]  = "Target map doesn't exist",
+       [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)]     = "Incorrect value type for map",
+       [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)]      = "Incorrect map type",
+       [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)]   = "Incorrect map key size",
+       [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
+       [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)]     = "Event not found for map setting",
+       [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)]   = "Invalid map size for event setting",
+       [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)]    = "Event dimension too large",
+       [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)]    = "Doesn't support inherit event",
+       [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)]   = "Wrong event type for map",
+       [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)]   = "Index too large",
 };
 
 static int
@@ -872,3 +1564,29 @@ int bpf__strerror_load(struct bpf_object *obj,
        bpf__strerror_end(buf, size);
        return 0;
 }
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+                            struct parse_events_term *term __maybe_unused,
+                            struct perf_evlist *evlist __maybe_unused,
+                            int *error_pos __maybe_unused, int err,
+                            char *buf, size_t size)
+{
+       bpf__strerror_head(err, buf, size);
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+                           "Can't use this config term with this map type");
+       bpf__strerror_end(buf, size);
+       return 0;
+}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+       bpf__strerror_head(err, buf, size);
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+                           "Cannot set event to BPF map in multi-thread tracing");
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+                           "%s (Hint: use -i to turn off inherit)", emsg);
+       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+                           "Can only put raw, hardware and BPF output event into a BPF map");
+       bpf__strerror_end(buf, size);
+       return 0;
+}
index 6fdc0457e2b66ea3fedd8e26c4a0e8f6a4bcc6e2..be4311944e3daa2abc87cdd54be72ed7bce70682 100644 (file)
@@ -10,6 +10,7 @@
 #include <string.h>
 #include <bpf/libbpf.h>
 #include "probe-event.h"
+#include "evlist.h"
 #include "debug.h"
 
 enum bpf_loader_errno {
@@ -24,10 +25,25 @@ enum bpf_loader_errno {
        BPF_LOADER_ERRNO__PROLOGUE,     /* Failed to generate prologue */
        BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
        BPF_LOADER_ERRNO__PROLOGUEOOB,  /* Offset out of bound for prologue */
+       BPF_LOADER_ERRNO__OBJCONF_OPT,  /* Invalid object config option */
+       BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_OPT,      /* Invalid object map config option */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE,    /* Incorrect value type for map */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,     /* Incorrect map type */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE,  /* Incorrect map key size */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT,    /* Event not found for map setting */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE,  /* Invalid map size for event setting */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,   /* Event dimension too large */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,   /* Doesn't support inherit event */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,  /* Wrong event type for map */
+       BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,  /* Index too large */
        __BPF_LOADER_ERRNO__END,
 };
 
 struct bpf_object;
+struct parse_events_term;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
 
 typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
@@ -53,6 +69,16 @@ int bpf__strerror_load(struct bpf_object *obj, int err,
                       char *buf, size_t size);
 int bpf__foreach_tev(struct bpf_object *obj,
                     bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+                   struct perf_evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+                            struct parse_events_term *term,
+                            struct perf_evlist *evlist,
+                            int *error_pos, int err, char *buf,
+                            size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused,
@@ -83,6 +109,21 @@ bpf__foreach_tev(struct bpf_object *obj __maybe_unused,
        return 0;
 }
 
+static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+               struct parse_events_term *term __maybe_unused,
+               struct perf_evlist *evlist __maybe_unused,
+               int *error_pos __maybe_unused)
+{
+       return 0;
+}
+
+static inline int
+bpf__apply_obj_config(void)
+{
+       return 0;
+}
+
 static inline int
 __bpf_strerror(char *buf, size_t size)
 {
@@ -118,5 +159,23 @@ static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
 {
        return __bpf_strerror(buf, size);
 }
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+                        struct parse_events_term *term __maybe_unused,
+                        struct perf_evlist *evlist __maybe_unused,
+                        int *error_pos __maybe_unused,
+                        int err __maybe_unused,
+                        char *buf, size_t size)
+{
+       return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+                              char *buf, size_t size)
+{
+       return __bpf_strerror(buf, size);
+}
 #endif
 #endif
index 6a7e273a514a642b30a477c3119696dc7fa09975..f1479eeef7daf9ae2c386abcef7d079a0370c836 100644 (file)
@@ -166,6 +166,50 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
        return build_id__filename(build_id_hex, bf, size);
 }
 
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
+{
+       char *id_name, *ch;
+       struct stat sb;
+
+       id_name = dso__build_id_filename(dso, bf, size);
+       if (!id_name)
+               goto err;
+       if (access(id_name, F_OK))
+               goto err;
+       if (lstat(id_name, &sb) == -1)
+               goto err;
+       if ((size_t)sb.st_size > size - 1)
+               goto err;
+       if (readlink(id_name, bf, size - 1) < 0)
+               goto err;
+
+       bf[sb.st_size] = '\0';
+
+       /*
+        * link should be:
+        * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92
+        */
+       ch = strrchr(bf, '/');
+       if (!ch)
+               goto err;
+       if (ch - 3 < bf)
+               goto err;
+
+       return strncmp(".ko", ch - 3, 3) == 0;
+err:
+       /*
+        * If dso__build_id_filename work, get id_name again,
+        * because id_name points to bf and is broken.
+        */
+       if (id_name)
+               id_name = dso__build_id_filename(dso, bf, size);
+       pr_err("Invalid build id: %s\n", id_name ? :
+                                        dso->long_name ? :
+                                        dso->short_name ? :
+                                        "[unknown]");
+       return false;
+}
+
 #define dsos__for_each_with_build_id(pos, head)        \
        list_for_each_entry(pos, head, node)    \
                if (!pos->has_build_id)         \
@@ -211,6 +255,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
        dsos__for_each_with_build_id(pos, &machine->dsos.head) {
                const char *name;
                size_t name_len;
+               bool in_kernel = false;
 
                if (!pos->hit)
                        continue;
@@ -227,8 +272,11 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
                        name_len = pos->long_name_len + 1;
                }
 
+               in_kernel = pos->kernel ||
+                               is_kernel_module(name,
+                                       PERF_RECORD_MISC_CPUMODE_UNKNOWN);
                err = write_buildid(name, name_len, pos->build_id, machine->pid,
-                                   pos->kernel ? kmisc : umisc, fd);
+                                   in_kernel ? kmisc : umisc, fd);
                if (err)
                        break;
        }
index 27a14a8a945beb8eec9cc389ca7d6545b44a2ac4..64af3e20610d7718ecacfad3810fc4bff4ee882d 100644 (file)
@@ -16,6 +16,7 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
 int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
+bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
                           struct perf_sample *sample, struct perf_evsel *evsel,
index 07b5d63947b11ec5f70029c6e41266e56b2bd8aa..3ca453f0c51f6f5b269f84270e78132e45f9768c 100644 (file)
@@ -23,6 +23,8 @@
 #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
 #define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
 
+extern const char *config_exclusive_filename;
+
 typedef int (*config_fn_t)(const char *, const char *, void *);
 extern int perf_default_config(const char *, const char *, void *);
 extern int perf_config(config_fn_t fn, void *);
@@ -31,6 +33,7 @@ extern u64 perf_config_u64(const char *, const char *);
 extern int perf_config_bool(const char *, const char *);
 extern int config_error_nonbool(const char *);
 extern const char *perf_config_dirname(const char *, const char *);
+extern const char *perf_etc_perfconfig(void);
 
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
index 53c43eb9489e4ba4a4e56a795bc05c2b85fb4cbc..24b4bd0d77545e7bb9f95e83222eb103c92f5151 100644 (file)
@@ -416,7 +416,7 @@ create_child(struct callchain_node *parent, bool inherit_children)
 /*
  * Fill the node with callchain values
  */
-static void
+static int
 fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 {
        struct callchain_cursor_node *cursor_node;
@@ -433,7 +433,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                call = zalloc(sizeof(*call));
                if (!call) {
                        perror("not enough memory for the code path tree");
-                       return;
+                       return -1;
                }
                call->ip = cursor_node->ip;
                call->ms.sym = cursor_node->sym;
@@ -443,6 +443,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                callchain_cursor_advance(cursor);
                cursor_node = callchain_cursor_current(cursor);
        }
+       return 0;
 }
 
 static struct callchain_node *
@@ -453,7 +454,19 @@ add_child(struct callchain_node *parent,
        struct callchain_node *new;
 
        new = create_child(parent, false);
-       fill_node(new, cursor);
+       if (new == NULL)
+               return NULL;
+
+       if (fill_node(new, cursor) < 0) {
+               struct callchain_list *call, *tmp;
+
+               list_for_each_entry_safe(call, tmp, &new->val, list) {
+                       list_del(&call->list);
+                       free(call);
+               }
+               free(new);
+               return NULL;
+       }
 
        new->children_hit = 0;
        new->hit = period;
@@ -462,16 +475,32 @@ add_child(struct callchain_node *parent,
        return new;
 }
 
-static s64 match_chain(struct callchain_cursor_node *node,
-                     struct callchain_list *cnode)
+enum match_result {
+       MATCH_ERROR  = -1,
+       MATCH_EQ,
+       MATCH_LT,
+       MATCH_GT,
+};
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+                                    struct callchain_list *cnode)
 {
        struct symbol *sym = node->sym;
+       u64 left, right;
 
        if (cnode->ms.sym && sym &&
-           callchain_param.key == CCKEY_FUNCTION)
-               return cnode->ms.sym->start - sym->start;
-       else
-               return cnode->ip - node->ip;
+           callchain_param.key == CCKEY_FUNCTION) {
+               left = cnode->ms.sym->start;
+               right = sym->start;
+       } else {
+               left = cnode->ip;
+               right = node->ip;
+       }
+
+       if (left == right)
+               return MATCH_EQ;
+
+       return left > right ? MATCH_GT : MATCH_LT;
 }
 
 /*
@@ -479,7 +508,7 @@ static s64 match_chain(struct callchain_cursor_node *node,
  * give a part of its callchain to the created child.
  * Then create another child to host the given callchain of new branch
  */
-static void
+static int
 split_add_child(struct callchain_node *parent,
                struct callchain_cursor *cursor,
                struct callchain_list *to_split,
@@ -491,6 +520,8 @@ split_add_child(struct callchain_node *parent,
 
        /* split */
        new = create_child(parent, true);
+       if (new == NULL)
+               return -1;
 
        /* split the callchain and move a part to the new child */
        old_tail = parent->val.prev;
@@ -524,6 +555,8 @@ split_add_child(struct callchain_node *parent,
 
                node = callchain_cursor_current(cursor);
                new = add_child(parent, cursor, period);
+               if (new == NULL)
+                       return -1;
 
                /*
                 * This is second child since we moved parent's children
@@ -534,7 +567,7 @@ split_add_child(struct callchain_node *parent,
                cnode = list_first_entry(&first->val, struct callchain_list,
                                         list);
 
-               if (match_chain(node, cnode) < 0)
+               if (match_chain(node, cnode) == MATCH_LT)
                        pp = &p->rb_left;
                else
                        pp = &p->rb_right;
@@ -545,14 +578,15 @@ split_add_child(struct callchain_node *parent,
                parent->hit = period;
                parent->count = 1;
        }
+       return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
             struct callchain_cursor *cursor,
             u64 period);
 
-static void
+static int
 append_chain_children(struct callchain_node *root,
                      struct callchain_cursor *cursor,
                      u64 period)
@@ -564,36 +598,42 @@ append_chain_children(struct callchain_node *root,
 
        node = callchain_cursor_current(cursor);
        if (!node)
-               return;
+               return -1;
 
        /* lookup in childrens */
        while (*p) {
-               s64 ret;
+               enum match_result ret;
 
                parent = *p;
                rnode = rb_entry(parent, struct callchain_node, rb_node_in);
 
                /* If at least first entry matches, rely to children */
                ret = append_chain(rnode, cursor, period);
-               if (ret == 0)
+               if (ret == MATCH_EQ)
                        goto inc_children_hit;
+               if (ret == MATCH_ERROR)
+                       return -1;
 
-               if (ret < 0)
+               if (ret == MATCH_LT)
                        p = &parent->rb_left;
                else
                        p = &parent->rb_right;
        }
        /* nothing in children, add to the current node */
        rnode = add_child(root, cursor, period);
+       if (rnode == NULL)
+               return -1;
+
        rb_link_node(&rnode->rb_node_in, parent, p);
        rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
 
 inc_children_hit:
        root->children_hit += period;
        root->children_count++;
+       return 0;
 }
 
-static int
+static enum match_result
 append_chain(struct callchain_node *root,
             struct callchain_cursor *cursor,
             u64 period)
@@ -602,7 +642,7 @@ append_chain(struct callchain_node *root,
        u64 start = cursor->pos;
        bool found = false;
        u64 matches;
-       int cmp = 0;
+       enum match_result cmp = MATCH_ERROR;
 
        /*
         * Lookup in the current node
@@ -618,7 +658,7 @@ append_chain(struct callchain_node *root,
                        break;
 
                cmp = match_chain(node, cnode);
-               if (cmp)
+               if (cmp != MATCH_EQ)
                        break;
 
                found = true;
@@ -628,7 +668,7 @@ append_chain(struct callchain_node *root,
 
        /* matches not, relay no the parent */
        if (!found) {
-               WARN_ONCE(!cmp, "Chain comparison error\n");
+               WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
                return cmp;
        }
 
@@ -636,21 +676,25 @@ append_chain(struct callchain_node *root,
 
        /* we match only a part of the node. Split it and add the new chain */
        if (matches < root->val_nr) {
-               split_add_child(root, cursor, cnode, start, matches, period);
-               return 0;
+               if (split_add_child(root, cursor, cnode, start, matches,
+                                   period) < 0)
+                       return MATCH_ERROR;
+
+               return MATCH_EQ;
        }
 
        /* we match 100% of the path, increment the hit */
        if (matches == root->val_nr && cursor->pos == cursor->nr) {
                root->hit += period;
                root->count++;
-               return 0;
+               return MATCH_EQ;
        }
 
        /* We match the node and still have a part remaining */
-       append_chain_children(root, cursor, period);
+       if (append_chain_children(root, cursor, period) < 0)
+               return MATCH_ERROR;
 
-       return 0;
+       return MATCH_EQ;
 }
 
 int callchain_append(struct callchain_root *root,
@@ -662,7 +706,8 @@ int callchain_append(struct callchain_root *root,
 
        callchain_cursor_commit(cursor);
 
-       append_chain_children(&root->node, cursor, period);
+       if (append_chain_children(&root->node, cursor, period) < 0)
+               return -1;
 
        if (cursor->nr > root->max_depth)
                root->max_depth = cursor->nr;
@@ -690,7 +735,8 @@ merge_chain_branch(struct callchain_cursor *cursor,
 
        if (src->hit) {
                callchain_cursor_commit(cursor);
-               append_chain_children(dst, cursor, src->hit);
+               if (append_chain_children(dst, cursor, src->hit) < 0)
+                       return -1;
        }
 
        n = rb_first(&src->rb_root_in);
index e5fb88bab9e1c416a3c53fe5036f8eb57f64a55d..43e84aa27e4a6489eafdebc3636d8bfb33334e3f 100644 (file)
@@ -32,14 +32,15 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
        return 0;
 }
 
-int perf_color_default_config(const char *var, const char *value, void *cb)
+int perf_color_default_config(const char *var, const char *value,
+                             void *cb __maybe_unused)
 {
        if (!strcmp(var, "color.ui")) {
                perf_use_color_default = perf_config_colorbool(var, value, -1);
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int __color_vsnprintf(char *bf, size_t size, const char *color,
index d3e12e30e1d520f8073d1f01d17e1eaf4ac30254..4e727635476eadf5b105a7be4620f86a4bf46499 100644 (file)
@@ -26,7 +26,7 @@ static const char *config_file_name;
 static int config_linenr;
 static int config_file_eof;
 
-static const char *config_exclusive_filename;
+const char *config_exclusive_filename;
 
 static int get_next_char(void)
 {
@@ -434,7 +434,7 @@ static int perf_config_from_file(config_fn_t fn, const char *filename, void *dat
        return ret;
 }
 
-static const char *perf_etc_perfconfig(void)
+const char *perf_etc_perfconfig(void)
 {
        static const char *system_wide;
        if (!system_wide)
index fa935093a599429011fa214c24645fd0230e3b3a..9bcf2bed3a6d1b7369ee4deee7f38e9c4abab06d 100644 (file)
@@ -8,6 +8,10 @@
 #include <linux/bitmap.h>
 #include "asm/bug.h"
 
+static int max_cpu_num;
+static int max_node_num;
+static int *cpunode_map;
+
 static struct cpu_map *cpu_map__default_new(void)
 {
        struct cpu_map *cpus;
@@ -486,6 +490,32 @@ out:
                pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
 }
 
+int cpu__max_node(void)
+{
+       if (unlikely(!max_node_num))
+               set_max_node_num();
+
+       return max_node_num;
+}
+
+int cpu__max_cpu(void)
+{
+       if (unlikely(!max_cpu_num))
+               set_max_cpu_num();
+
+       return max_cpu_num;
+}
+
+int cpu__get_node(int cpu)
+{
+       if (unlikely(cpunode_map == NULL)) {
+               pr_debug("cpu_map not initialized\n");
+               return -1;
+       }
+
+       return cpunode_map[cpu];
+}
+
 static int init_cpunode_map(void)
 {
        int i;
index 71c41b9efabb3b38dd17a7374413985dfd944f77..81a2562aaa2b02261b88c960997238dc9e0925ab 100644 (file)
@@ -57,37 +57,11 @@ static inline bool cpu_map__empty(const struct cpu_map *map)
        return map ? map->map[0] == -1 : true;
 }
 
-int max_cpu_num;
-int max_node_num;
-int *cpunode_map;
-
 int cpu__setup_cpunode_map(void);
 
-static inline int cpu__max_node(void)
-{
-       if (unlikely(!max_node_num))
-               pr_debug("cpu_map not initialized\n");
-
-       return max_node_num;
-}
-
-static inline int cpu__max_cpu(void)
-{
-       if (unlikely(!max_cpu_num))
-               pr_debug("cpu_map not initialized\n");
-
-       return max_cpu_num;
-}
-
-static inline int cpu__get_node(int cpu)
-{
-       if (unlikely(cpunode_map == NULL)) {
-               pr_debug("cpu_map not initialized\n");
-               return -1;
-       }
-
-       return cpunode_map[cpu];
-}
+int cpu__max_node(void);
+int cpu__max_cpu(void);
+int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
                       int (*f)(struct cpu_map *map, int cpu, void *data),
index aada3ac5e891f85be1f4d25791d4ee17dd859077..d4a5a21c2a7e2e47596444665b5f5828874da337 100644 (file)
@@ -31,9 +31,18 @@ unsigned char sane_ctype[256] = {
 };
 
 const char *graph_line =
+       "_____________________________________________________________________"
        "_____________________________________________________________________"
        "_____________________________________________________________________";
 const char *graph_dotted_line =
        "---------------------------------------------------------------------"
        "---------------------------------------------------------------------"
        "---------------------------------------------------------------------";
+const char *spaces =
+       "                                                                     "
+       "                                                                     "
+       "                                                                     ";
+const char *dots =
+       "....................................................................."
+       "....................................................................."
+       ".....................................................................";
index 34cd1e4039d35e05be0460ff0a259e7d8b2bb80a..811af89ce0bb8f37b99898dc8b2a2d5f6e183d26 100644 (file)
@@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw,
        return ret;
 }
 
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+                     struct bt_ctf_event *event,
+                     struct perf_sample *sample)
+{
+       struct bt_ctf_field_type *len_type, *seq_type;
+       struct bt_ctf_field *len_field, *seq_field;
+       unsigned int raw_size = sample->raw_size;
+       unsigned int nr_elements = raw_size / sizeof(u32);
+       unsigned int i;
+       int ret;
+
+       if (nr_elements * sizeof(u32) != raw_size)
+               pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+                          raw_size, nr_elements * sizeof(u32) - raw_size);
+
+       len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+       len_field = bt_ctf_field_create(len_type);
+       if (!len_field) {
+               pr_err("failed to create 'raw_len' for bpf output event\n");
+               ret = -1;
+               goto put_len_type;
+       }
+
+       ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+       if (ret) {
+               pr_err("failed to set field value for raw_len\n");
+               goto put_len_field;
+       }
+       ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+       if (ret) {
+               pr_err("failed to set payload to raw_len\n");
+               goto put_len_field;
+       }
+
+       seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+       seq_field = bt_ctf_field_create(seq_type);
+       if (!seq_field) {
+               pr_err("failed to create 'raw_data' for bpf output event\n");
+               ret = -1;
+               goto put_seq_type;
+       }
+
+       ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+       if (ret) {
+               pr_err("failed to set length of 'raw_data'\n");
+               goto put_seq_field;
+       }
+
+       for (i = 0; i < nr_elements; i++) {
+               struct bt_ctf_field *elem_field =
+                       bt_ctf_field_sequence_get_field(seq_field, i);
+
+               ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+                               ((u32 *)(sample->raw_data))[i]);
+
+               bt_ctf_field_put(elem_field);
+               if (ret) {
+                       pr_err("failed to set raw_data[%d]\n", i);
+                       goto put_seq_field;
+               }
+       }
+
+       ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+       if (ret)
+               pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+       bt_ctf_field_put(seq_field);
+put_seq_type:
+       bt_ctf_field_type_put(seq_type);
+put_len_field:
+       bt_ctf_field_put(len_field);
+put_len_type:
+       bt_ctf_field_type_put(len_type);
+       return ret;
+}
+
 static int add_generic_values(struct ctf_writer *cw,
                              struct bt_ctf_event *event,
                              struct perf_evsel *evsel,
@@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool,
                        return -1;
        }
 
+       if (perf_evsel__is_bpf_output(evsel)) {
+               ret = add_bpf_output_values(event_class, event, sample);
+               if (ret)
+                       return -1;
+       }
+
        cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
        if (cs) {
                if (is_flush_needed(cs))
@@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw,
        return ret;
 }
 
+static int add_bpf_output_types(struct ctf_writer *cw,
+                               struct bt_ctf_event_class *class)
+{
+       struct bt_ctf_field_type *len_type = cw->data.u32;
+       struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+       struct bt_ctf_field_type *seq_type;
+       int ret;
+
+       ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+       if (ret)
+               return ret;
+
+       seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+       if (!seq_type)
+               return -1;
+
+       return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
 static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
                             struct bt_ctf_event_class *event_class)
 {
@@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
         *                              ctf event header
         *   PERF_SAMPLE_READ         - TODO
         *   PERF_SAMPLE_CALLCHAIN    - TODO
-        *   PERF_SAMPLE_RAW          - tracepoint fields are handled separately
+        *   PERF_SAMPLE_RAW          - tracepoint fields and BPF output
+        *                              are handled separately
         *   PERF_SAMPLE_BRANCH_STACK - TODO
         *   PERF_SAMPLE_REGS_USER    - TODO
         *   PERF_SAMPLE_STACK_USER   - TODO
@@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
                        goto err;
        }
 
+       if (perf_evsel__is_bpf_output(evsel)) {
+               ret = add_bpf_output_types(cw, event_class);
+               if (ret)
+                       goto err;
+       }
+
        ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
        if (ret) {
                pr("Failed to add event class into stream.\n");
@@ -858,6 +968,23 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
        return 0;
 }
 
+static void cleanup_events(struct perf_session *session)
+{
+       struct perf_evlist *evlist = session->evlist;
+       struct perf_evsel *evsel;
+
+       evlist__for_each(evlist, evsel) {
+               struct evsel_priv *priv;
+
+               priv = evsel->priv;
+               bt_ctf_event_class_put(priv->event_class);
+               zfree(&evsel->priv);
+       }
+
+       perf_evlist__delete(evlist);
+       session->evlist = NULL;
+}
+
 static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
 {
        struct ctf_stream **stream;
@@ -953,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
            bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
                goto err;
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+       bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+       bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
        pr2("Created type: INTEGER %d-bit %ssigned %s\n",
            size, sign ? "un" : "", hex ? "hex" : "");
        return type;
@@ -1100,7 +1233,7 @@ static int convert__config(const char *var, const char *value, void *cb)
                return 0;
        }
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 int bt_convert__perf2ctf(const char *input, const char *path, bool force)
@@ -1171,6 +1304,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
                (double) c.events_size / 1024.0 / 1024.0,
                c.events_count);
 
+       cleanup_events(session);
        perf_session__delete(session);
        ctf_writer__cleanup(cw);
 
index 86d9c73025983d0132a4dddc8607161d713bfbf2..8c4212abd19b48b9e84ca1f9978f06561c05576a 100644 (file)
@@ -5,6 +5,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <stdio.h>
+#include <api/debug.h>
 
 #include "cache.h"
 #include "color.h"
@@ -22,7 +23,7 @@ int debug_ordered_events;
 static int redirect_to_stderr;
 int debug_data_convert;
 
-static int _eprintf(int level, int var, const char *fmt, va_list args)
+int veprintf(int level, int var, const char *fmt, va_list args)
 {
        int ret = 0;
 
@@ -36,24 +37,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args)
        return ret;
 }
 
-int veprintf(int level, int var, const char *fmt, va_list args)
-{
-       return _eprintf(level, var, fmt, args);
-}
-
 int eprintf(int level, int var, const char *fmt, ...)
 {
        va_list args;
        int ret;
 
        va_start(args, fmt);
-       ret = _eprintf(level, var, fmt, args);
+       ret = veprintf(level, var, fmt, args);
        va_end(args);
 
        return ret;
 }
 
-static int __eprintf_time(u64 t, const char *fmt, va_list args)
+static int veprintf_time(u64 t, const char *fmt, va_list args)
 {
        int ret = 0;
        u64 secs, usecs, nsecs = t;
@@ -75,7 +71,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...)
 
        if (var >= level) {
                va_start(args, fmt);
-               ret = __eprintf_time(t, fmt, args);
+               ret = veprintf_time(t, fmt, args);
                va_end(args);
        }
 
@@ -91,7 +87,7 @@ void pr_stat(const char *fmt, ...)
        va_list args;
 
        va_start(args, fmt);
-       _eprintf(1, verbose, fmt, args);
+       veprintf(1, verbose, fmt, args);
        va_end(args);
        eprintf(1, verbose, "\n");
 }
@@ -110,40 +106,61 @@ int dump_printf(const char *fmt, ...)
        return ret;
 }
 
+static void trace_event_printer(enum binary_printer_ops op,
+                               unsigned int val, void *extra)
+{
+       const char *color = PERF_COLOR_BLUE;
+       union perf_event *event = (union perf_event *)extra;
+       unsigned char ch = (unsigned char)val;
+
+       switch (op) {
+       case BINARY_PRINT_DATA_BEGIN:
+               printf(".");
+               color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+                               event->header.size);
+               break;
+       case BINARY_PRINT_LINE_BEGIN:
+               printf(".");
+               break;
+       case BINARY_PRINT_ADDR:
+               color_fprintf(stdout, color, "  %04x: ", val);
+               break;
+       case BINARY_PRINT_NUM_DATA:
+               color_fprintf(stdout, color, " %02x", val);
+               break;
+       case BINARY_PRINT_NUM_PAD:
+               color_fprintf(stdout, color, "   ");
+               break;
+       case BINARY_PRINT_SEP:
+               color_fprintf(stdout, color, "  ");
+               break;
+       case BINARY_PRINT_CHAR_DATA:
+               color_fprintf(stdout, color, "%c",
+                             isprint(ch) ? ch : '.');
+               break;
+       case BINARY_PRINT_CHAR_PAD:
+               color_fprintf(stdout, color, " ");
+               break;
+       case BINARY_PRINT_LINE_END:
+               color_fprintf(stdout, color, "\n");
+               break;
+       case BINARY_PRINT_DATA_END:
+               printf("\n");
+               break;
+       default:
+               break;
+       }
+}
+
 void trace_event(union perf_event *event)
 {
        unsigned char *raw_event = (void *)event;
-       const char *color = PERF_COLOR_BLUE;
-       int i, j;
 
        if (!dump_trace)
                return;
 
-       printf(".");
-       color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
-                     event->header.size);
-
-       for (i = 0; i < event->header.size; i++) {
-               if ((i & 15) == 0) {
-                       printf(".");
-                       color_fprintf(stdout, color, "  %04x: ", i);
-               }
-
-               color_fprintf(stdout, color, " %02x", raw_event[i]);
-
-               if (((i & 15) == 15) || i == event->header.size-1) {
-                       color_fprintf(stdout, color, "  ");
-                       for (j = 0; j < 15-(i & 15); j++)
-                               color_fprintf(stdout, color, "   ");
-                       for (j = i & ~15; j <= i; j++) {
-                               color_fprintf(stdout, color, "%c",
-                                             isprint(raw_event[j]) ?
-                                             raw_event[j] : '.');
-                       }
-                       color_fprintf(stdout, color, "\n");
-               }
-       }
-       printf(".\n");
+       print_binary(raw_event, event->header.size, 16,
+                    trace_event_printer, event);
 }
 
 static struct debug_variable {
@@ -192,3 +209,23 @@ int perf_debug_option(const char *str)
        free(s);
        return 0;
 }
+
+#define DEBUG_WRAPPER(__n, __l)                                \
+static int pr_ ## __n ## _wrapper(const char *fmt, ...)        \
+{                                                      \
+       va_list args;                                   \
+       int ret;                                        \
+                                                       \
+       va_start(args, fmt);                            \
+       ret = veprintf(__l, verbose, fmt, args);        \
+       va_end(args);                                   \
+       return ret;                                     \
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+       libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}
index 8b9a088c32ab4e330ece47ae91397bdd87bdd0ee..14bafda79edaeba1bbd3e007b5faa8607d0570bf 100644 (file)
@@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__(
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
+void perf_debug_setup(void);
 
 #endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
new file mode 100644 (file)
index 0000000..3e6062a
--- /dev/null
@@ -0,0 +1,199 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+enum {
+       MODE_PREFIX = 0,
+       MODE_CLASS  = 1,
+       MODE_FUNC   = 2,
+       MODE_TYPE   = 3,
+       MODE_CTYPE  = 3, /* class arg */
+};
+
+#define BASE_ENT(c, n) [c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+       BASE_ENT('B', "byte" ),
+       BASE_ENT('C', "char" ),
+       BASE_ENT('D', "double" ),
+       BASE_ENT('F', "float" ),
+       BASE_ENT('I', "int" ),
+       BASE_ENT('J', "long" ),
+       BASE_ENT('S', "short" ),
+       BASE_ENT('Z', "bool" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ *  Return:
+ *     success: buf
+ *     error  : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+       int rlen = 0;
+       int array = 0;
+       int narg = 0;
+       const char *q;
+
+       if (!end)
+               end = str + strlen(str);
+
+       for (q = str; q != end; q++) {
+
+               if (rlen == (maxlen - 1))
+                       break;
+
+               switch (*q) {
+               case 'L':
+                       if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
+                               if (mode == MODE_CTYPE) {
+                                       if (narg)
+                                               rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+                                       narg++;
+                               }
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
+                               if (mode == MODE_PREFIX)
+                                       mode = MODE_CLASS;
+                       } else
+                               buf[rlen++] = *q;
+                       break;
+               case 'B':
+               case 'C':
+               case 'D':
+               case 'F':
+               case 'I':
+               case 'J':
+               case 'S':
+               case 'Z':
+                       if (mode == MODE_TYPE) {
+                               if (narg)
+                                       rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+                               while (array--)
+                                       rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+                               array = 0;
+                               narg++;
+                       } else
+                               buf[rlen++] = *q;
+                       break;
+               case 'V':
+                       if (mode == MODE_TYPE) {
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+                               while (array--)
+                                       rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+                               array = 0;
+                       } else
+                               buf[rlen++] = *q;
+                       break;
+               case '[':
+                       if (mode != MODE_TYPE)
+                               goto error;
+                       array++;
+                       break;
+               case '(':
+                       if (mode != MODE_FUNC)
+                               goto error;
+                       buf[rlen++] = *q;
+                       mode = MODE_TYPE;
+                       break;
+               case ')':
+                       if (mode != MODE_TYPE)
+                               goto error;
+                       buf[rlen++] = *q;
+                       narg = 0;
+                       break;
+               case ';':
+                       if (mode != MODE_CLASS && mode != MODE_CTYPE)
+                               goto error;
+                       /* safe because at least one other char to process */
+                       if (isalpha(*(q + 1)))
+                               rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+                       if (mode == MODE_CLASS)
+                               mode = MODE_FUNC;
+                       else if (mode == MODE_CTYPE)
+                               mode = MODE_TYPE;
+                       break;
+               case '/':
+                       if (mode != MODE_CLASS && mode != MODE_CTYPE)
+                               goto error;
+                       rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+                       break;
+               default :
+                       buf[rlen++] = *q;
+               }
+       }
+       buf[rlen] = '\0';
+       return buf;
+error:
+       return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ *     str: string to parse. String is not modified
+ *    flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ *     if input can be demangled, then a newly allocated string is returned.
+ *     if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+       char *buf, *ptr;
+       char *p;
+       size_t len, l1 = 0;
+
+       if (!str)
+               return NULL;
+
+       /* find start of retunr type */
+       p = strrchr(str, ')');
+       if (!p)
+               return NULL;
+
+       /*
+        * expansion factor estimated to 3x
+        */
+       len = strlen(str) * 3 + 1;
+       buf = malloc(len);
+       if (!buf)
+               return NULL;
+
+       buf[0] = '\0';
+       if (!(flags & JAVA_DEMANGLE_NORET)) {
+               /*
+                * get return type first
+                */
+               ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+               if (!ptr)
+                       goto error;
+
+               /* add space between return type and function prototype */
+               l1 = strlen(buf);
+               buf[l1++] = ' ';
+       }
+
+       /* process function up to return type */
+       ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+       if (!ptr)
+               goto error;
+
+       return buf;
+error:
+       free(buf);
+       return NULL;
+}
diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h
new file mode 100644 (file)
index 0000000..a981c1f
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET    0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */
index e8e9a9dbf5e395a20d589c80b253c2d869b927c1..8e6395439ca0830cefaaa5b6dbe905ae2af93011 100644 (file)
@@ -52,6 +52,11 @@ int dso__read_binary_type_filename(const struct dso *dso,
                        debuglink--;
                if (*debuglink == '/')
                        debuglink++;
+
+               ret = -1;
+               if (!is_regular_file(filename))
+                       break;
+
                ret = filename__read_debuglink(filename, debuglink,
                                               size - (debuglink - filename));
                }
index 7dd5939dea2e58385bd374a3e59aecf766e799c1..49a11d9d8b8f050efa48715cfba9e54731f63eef 100644 (file)
@@ -6,6 +6,8 @@ struct perf_env perf_env;
 
 void perf_env__exit(struct perf_env *env)
 {
+       int i;
+
        zfree(&env->hostname);
        zfree(&env->os_release);
        zfree(&env->version);
@@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env)
        zfree(&env->numa_nodes);
        zfree(&env->pmu_mappings);
        zfree(&env->cpu);
+
+       for (i = 0; i < env->caches_cnt; i++)
+               cpu_cache_level__free(&env->caches[i]);
+       zfree(&env->caches);
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
@@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
        env->nr_cpus_avail = nr_cpus;
        return 0;
 }
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+       free(cache->type);
+       free(cache->map);
+       free(cache->size);
+}
index 0132b9557c02b56f7f31e3e51981d55b0d1027bd..56cffb60a0b42e456a11fb2bd489d74a69893b0c 100644 (file)
@@ -1,11 +1,23 @@
 #ifndef __PERF_ENV_H
 #define __PERF_ENV_H
 
+#include <linux/types.h>
+
 struct cpu_topology_map {
        int     socket_id;
        int     core_id;
 };
 
+struct cpu_cache_level {
+       u32     level;
+       u32     line_size;
+       u32     sets;
+       u32     ways;
+       char    *type;
+       char    *size;
+       char    *map;
+};
+
 struct perf_env {
        char                    *hostname;
        char                    *os_release;
@@ -31,6 +43,8 @@ struct perf_env {
        char                    *numa_nodes;
        char                    *pmu_mappings;
        struct cpu_topology_map *cpu;
+       struct cpu_cache_level  *caches;
+       int                      caches_cnt;
 };
 
 extern struct perf_env perf_env;
@@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
 
 int perf_env__read_cpu_topology_map(struct perf_env *env);
 
+void cpu_cache_level__free(struct cpu_cache_level *cache);
 #endif /* __PERF_ENV_H */
index 85155e91b61ba9b70feaf867b109270df498b9b8..7bad5c3fa7b7175862f5e3bdf38ffca0e1b14ee1 100644 (file)
@@ -282,7 +282,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                strcpy(execname, "");
 
                /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+               n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
                       &event->mmap2.start, &event->mmap2.len, prot,
                       &event->mmap2.pgoff, &event->mmap2.maj,
                       &event->mmap2.min,
index d81f13de24769963f6c32c3f809a714e6868dc5e..86a03836a83fc3f8ee8648d83317b8d91e3f48d8 100644 (file)
@@ -1181,12 +1181,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
         */
        if (cpus != evlist->cpus) {
                cpu_map__put(evlist->cpus);
-               evlist->cpus = cpus;
+               evlist->cpus = cpu_map__get(cpus);
        }
 
        if (threads != evlist->threads) {
                thread_map__put(evlist->threads);
-               evlist->threads = threads;
+               evlist->threads = thread_map__get(threads);
        }
 
        perf_evlist__propagate_maps(evlist);
@@ -1223,6 +1223,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
        int err = 0;
 
        evlist__for_each(evlist, evsel) {
+               if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+                       continue;
+
                err = perf_evsel__set_filter(evsel, filter);
                if (err)
                        break;
@@ -1624,7 +1627,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
        return printed + fprintf(fp, "\n");
 }
 
-int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
+int perf_evlist__strerror_open(struct perf_evlist *evlist,
                               int err, char *buf, size_t size)
 {
        int printed, value;
@@ -1652,7 +1655,25 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused,
                                    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
                                    "Hint:\tThe current value is %d.", value);
                break;
+       case EINVAL: {
+               struct perf_evsel *first = perf_evlist__first(evlist);
+               int max_freq;
+
+               if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+                       goto out_default;
+
+               if (first->attr.sample_freq < (u64)max_freq)
+                       goto out_default;
+
+               printed = scnprintf(buf, size,
+                                   "Error:\t%s.\n"
+                                   "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+                                   "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+                                   emsg, max_freq, first->attr.sample_freq);
+               break;
+       }
        default:
+out_default:
                scnprintf(buf, size, "%s", emsg);
                break;
        }
@@ -1723,3 +1744,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
 
        tracking_evsel->tracking = true;
 }
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
+                              const char *str)
+{
+       struct perf_evsel *evsel;
+
+       evlist__for_each(evlist, evsel) {
+               if (!evsel->name)
+                       continue;
+               if (strcmp(str, evsel->name) == 0)
+                       return evsel;
+       }
+
+       return NULL;
+}
index 7c4d9a2067769b0e3de2d96a11e6f7dd7d62106a..a0d15221db6e878412126f1ec5de08cb030f132f 100644 (file)
@@ -294,4 +294,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
                                     struct perf_evsel *tracking_evsel);
 
 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
 #endif /* __PERF_EVLIST_H */
index cdbaf9b51e428ad4537a38ded24ee07bec54e90c..0902fe418754ec0c3149d3daeafc122fee65e243 100644 (file)
@@ -225,6 +225,11 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
        if (evsel != NULL)
                perf_evsel__init(evsel, attr, idx);
 
+       if (perf_evsel__is_bpf_output(evsel)) {
+               evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+               evsel->attr.sample_period = 1;
+       }
+
        return evsel;
 }
 
@@ -898,6 +903,16 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
        if (evsel->precise_max)
                perf_event_attr__set_max_precise_ip(attr);
 
+       if (opts->all_user) {
+               attr->exclude_kernel = 1;
+               attr->exclude_user   = 0;
+       }
+
+       if (opts->all_kernel) {
+               attr->exclude_kernel = 0;
+               attr->exclude_user   = 1;
+       }
+
        /*
         * Apply event specific term settings,
         * it overloads any global configuration.
@@ -2362,12 +2377,15 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
        case EPERM:
        case EACCES:
                return scnprintf(msg, size,
-                "You may not have permission to collect %sstats.\n"
-                "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n"
-                " -1 - Not paranoid at all\n"
-                "  0 - Disallow raw tracepoint access for unpriv\n"
-                "  1 - Disallow cpu events for unpriv\n"
-                "  2 - Disallow kernel profiling for unpriv",
+                "You may not have permission to collect %sstats.\n\n"
+                "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
+                "which controls use of the performance events system by\n"
+                "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+                "The default value is 1:\n\n"
+                "  -1: Allow use of (almost) all events by all users\n"
+                ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n"
+                ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
+                ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN",
                                 target->system_wide ? "system-wide " : "");
        case ENOENT:
                return scnprintf(msg, size, "The %s event is not supported.",
index 8e75434bd01c671a8ed2e0c0b03a139212d7001c..501ea6e565f13a4a4817947957c79f15d805d130 100644 (file)
@@ -93,10 +93,8 @@ struct perf_evsel {
        const char              *unit;
        struct event_format     *tp_format;
        off_t                   id_offset;
-       union {
-               void            *priv;
-               u64             db_id;
-       };
+       void                    *priv;
+       u64                     db_id;
        struct cgroup_sel       *cgrp;
        void                    *handler;
        struct cpu_map          *cpus;
@@ -364,6 +362,14 @@ static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel)
 #undef FUNCTION_EVENT
 }
 
+static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
+{
+       struct perf_event_attr *attr = &evsel->attr;
+
+       return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
+               (attr->type == PERF_TYPE_SOFTWARE);
+}
+
 struct perf_attr_details {
        bool freq;
        bool verbose;
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
new file mode 100644 (file)
index 0000000..c1ef805
--- /dev/null
@@ -0,0 +1,449 @@
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ *     Stephane Eranian <eranian@gmail.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define JVMTI
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA    /* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+  unsigned int namesz;  /* Size of entry's owner string */
+  unsigned int descsz;  /* Size of the note descriptor */
+  unsigned int type;    /* Interpretation of the descriptor */
+  char         name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+       char *output;
+       int fd;
+};
+
+static char shd_string_table[] = {
+       0,
+       '.', 't', 'e', 'x', 't', 0,                     /*  1 */
+       '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /*  7 */
+       '.', 's', 'y', 'm', 't', 'a', 'b', 0,           /* 17 */
+       '.', 's', 't', 'r', 't', 'a', 'b', 0,           /* 25 */
+       '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+       '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+       '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+       '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+};
+
+static struct buildid_note {
+       Elf_Note desc;          /* descsz: size of build-id, must be multiple of 4 */
+       char     name[4];       /* GNU\0 */
+       char     build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+       /* symbol 0 MUST be the undefined symbol */
+       { .st_name  = 0, /* index in sym_string table */
+         .st_info  = ELF_ST_TYPE(STT_NOTYPE),
+         .st_shndx = 0, /* for now */
+         .st_value = 0x0,
+         .st_other = ELF_ST_VIS(STV_DEFAULT),
+         .st_size  = 0,
+       },
+       { .st_name  = 1, /* index in sym_string table */
+         .st_info  = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+         .st_shndx = 1,
+         .st_value = 0, /* for now */
+         .st_other = ELF_ST_VIS(STV_DEFAULT),
+         .st_size  = 0, /* for now */
+       }
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+            unsigned long load_addr __maybe_unused,
+            const void *code __maybe_unused,
+            size_t csize __maybe_unused)
+{
+       int fd;
+       size_t sz = sizeof(note->build_id);
+       ssize_t sret;
+
+       fd = open("/dev/urandom", O_RDONLY);
+       if (fd == -1)
+               err(1, "cannot access /dev/urandom for builid");
+
+       sret = read(fd, note->build_id, sz);
+
+       close(fd);
+
+       if (sret != (ssize_t)sz)
+               memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+            unsigned long load_addr __maybe_unused,
+            const void *code,
+            size_t csize)
+{
+       if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+               errx(1, "build_id too small for SHA1");
+
+       SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+       MD5_CTX context;
+
+       if (sizeof(note->build_id) < 16)
+               errx(1, "build_id too small for MD5");
+
+       MD5_Init(&context);
+       MD5_Update(&context, &load_addr, sizeof(load_addr));
+       MD5_Update(&context, code, csize);
+       MD5_Final((unsigned char *)note->build_id, &context);
+}
+#endif
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+             const void *code, int csize,
+             void *debug, int nr_debug_entries)
+{
+       Elf *e;
+       Elf_Data *d;
+       Elf_Scn *scn;
+       Elf_Ehdr *ehdr;
+       Elf_Shdr *shdr;
+       char *strsym = NULL;
+       int symlen;
+       int retval = -1;
+
+       if (elf_version(EV_CURRENT) == EV_NONE) {
+               warnx("ELF initialization failed");
+               return -1;
+       }
+
+       e = elf_begin(fd, ELF_C_WRITE, NULL);
+       if (!e) {
+               warnx("elf_begin failed");
+               goto error;
+       }
+
+       /*
+        * setup ELF header
+        */
+       ehdr = elf_newehdr(e);
+       if (!ehdr) {
+               warnx("cannot get ehdr");
+               goto error;
+       }
+
+       ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+       ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+       ehdr->e_machine = GEN_ELF_ARCH;
+       ehdr->e_type = ET_DYN;
+       ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+       ehdr->e_version = EV_CURRENT;
+       ehdr->e_shstrndx= 2; /* shdr index for section name */
+
+       /*
+        * setup text section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 16;
+       d->d_off = 0LL;
+       d->d_buf = (void *)code;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = csize;
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 1;
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+       shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup section headers string table
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = shd_string_table;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = sizeof(shd_string_table);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+       shdr->sh_type = SHT_STRTAB;
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup symtab section
+        */
+       symtab[1].st_size  = csize;
+       symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 8;
+       d->d_off = 0LL;
+       d->d_buf = symtab;
+       d->d_type = ELF_T_SYM;
+       d->d_size = sizeof(symtab);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+       shdr->sh_type = SHT_SYMTAB;
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = sizeof(Elf_Sym);
+       shdr->sh_link = 4; /* index of .strtab section */
+
+       /*
+        * setup symbols string table
+        * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+        */
+       symlen = 2 + strlen(sym);
+       strsym = calloc(1, symlen);
+       if (!strsym) {
+               warnx("cannot allocate strsym");
+               goto error;
+       }
+       strcpy(strsym + 1, sym);
+
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = strsym;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = symlen;
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 25; /* offset in shd_string_table */
+       shdr->sh_type = SHT_STRTAB;
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup build-id section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               goto error;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               goto error;
+       }
+
+       /*
+        * build-id generation
+        */
+       gen_build_id(&bnote, load_addr, code, csize);
+       bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+       bnote.desc.descsz = sizeof(bnote.build_id);
+       bnote.desc.type   = NT_GNU_BUILD_ID;
+       strcpy(bnote.name, "GNU");
+
+       d->d_align = 4;
+       d->d_off = 0LL;
+       d->d_buf = &bnote;
+       d->d_type = ELF_T_BYTE;
+       d->d_size = sizeof(bnote);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               goto error;
+       }
+
+       shdr->sh_name = 33; /* offset in shd_string_table */
+       shdr->sh_type = SHT_NOTE;
+       shdr->sh_addr = 0x0;
+       shdr->sh_flags = SHF_ALLOC;
+       shdr->sh_size = sizeof(bnote);
+       shdr->sh_entsize = 0;
+
+       if (debug && nr_debug_entries) {
+               retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+               if (retval)
+                       goto error;
+       } else {
+               if (elf_update(e, ELF_C_WRITE) < 0) {
+                       warnx("elf_update 4 failed");
+                       goto error;
+               }
+       }
+
+       retval = 0;
+error:
+       (void)elf_end(e);
+
+       free(strsym);
+
+
+       return retval;
+}
+
+#ifndef JVMTI
+
+static unsigned char x86_code[] = {
+    0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
+    0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
+    0xCD, 0x80            /* int $0x80 */
+};
+
+static struct options options;
+
+int main(int argc, char **argv)
+{
+       int c, fd, ret;
+
+       while ((c = getopt(argc, argv, "o:h")) != -1) {
+               switch (c) {
+               case 'o':
+                       options.output = optarg;
+                       break;
+               case 'h':
+                       printf("Usage: genelf -o output_file [-h]\n");
+                       return 0;
+               default:
+                       errx(1, "unknown option");
+               }
+       }
+
+       fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
+       if (fd == -1)
+               err(1, "cannot create file %s", options.output);
+
+       ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
+       close(fd);
+
+       if (ret != 0)
+               unlink(options.output);
+
+       return ret;
+}
+#endif
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
new file mode 100644 (file)
index 0000000..45bf9c6
--- /dev/null
@@ -0,0 +1,67 @@
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+/* genelf.c */
+extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+                        const void *code, int csize,
+                        void *debug, int nr_debug_entries);
+/* genelf_debug.c */
+extern int jit_add_debug_info(Elf *e, uint64_t code_addr,
+                             void *debug, int nr_debug_entries);
+
+#if   defined(__arm__)
+#define GEN_ELF_ARCH   EM_ARM
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH   EM_AARCH64
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH   EM_X86_64
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH   EM_386
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS32
+#elif defined(__ppcle__)
+#define GEN_ELF_ARCH   EM_PPC
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH   EM_PPC64
+#define GEN_ELF_ENDIAN ELFDATA2MSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__powerpcle__)
+#define GEN_ELF_ARCH   EM_PPC64
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#define GEN_ELF_CLASS  ELFCLASS64
+#else
+#error "unsupported architecture"
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr    elf64_newehdr
+#define elf_getshdr    elf64_getshdr
+#define Elf_Ehdr       Elf64_Ehdr
+#define Elf_Shdr       Elf64_Shdr
+#define Elf_Sym                Elf64_Sym
+#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a)  ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr    elf32_newehdr
+#define elf_getshdr    elf32_getshdr
+#define Elf_Ehdr       Elf32_Ehdr
+#define Elf_Shdr       Elf32_Shdr
+#define Elf_Sym                Elf32_Sym
+#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a)  ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr)
+
+#endif
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
new file mode 100644 (file)
index 0000000..5980f7d
--- /dev/null
@@ -0,0 +1,610 @@
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ *     Stephane Eranian <eranian@google.com>
+ *
+ * Released under the GPL v2.
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE    (4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t  sword;
+typedef int16_t  shalf;
+typedef uint8_t  ubyte;
+typedef int8_t   sbyte;
+
+struct buffer_ext {
+       size_t cur_pos;
+       size_t max_sz;
+       void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+       size_t i;
+       warnx("DUMP for %s", msg);
+       for (i = 0 ; i < be->cur_pos; i++)
+               warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+       void *tmp;
+       size_t be_sz = be->max_sz;
+
+retry:
+       if ((be->cur_pos + sz) < be_sz) {
+               memcpy(be->data + be->cur_pos, addr, sz);
+               be->cur_pos += sz;
+               return 0;
+       }
+
+       if (!be_sz)
+               be_sz = BUFFER_EXT_DFL_SIZE;
+       else
+               be_sz <<= 1;
+
+       tmp = realloc(be->data, be_sz);
+       if (!tmp)
+               return -1;
+
+       be->data   = tmp;
+       be->max_sz = be_sz;
+
+       goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+       be->data = NULL;
+       be->cur_pos = 0;
+       be->max_sz = 0;
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+       return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+       return be->data;
+}
+
+struct debug_line_header {
+       // Not counting this field
+       uword total_length;
+       // version number (2 currently)
+       uhalf version;
+       // relative offset from next field to
+       // program statement
+       uword prolog_length;
+       ubyte minimum_instruction_length;
+       ubyte default_is_stmt;
+       // line_base - see DWARF 2 specs
+       sbyte line_base;
+       // line_range - see DWARF 2 specs
+       ubyte line_range;
+       // number of opcode + 1
+       ubyte opcode_base;
+       /* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+       /* follow the search directories index, zero terminated string
+        * terminated by an empty string.
+        */
+       /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+        * the directory index entry, 0 means current directory, then mtime
+        * and filesize, last entry is followed by en empty string.
+        */
+       /* follow the first program statement */
+} __attribute__((packed));
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+       uword total_length;
+       uhalf version;
+       uword debug_abbrev_offset;
+       ubyte pointer_size;
+} __attribute__((packed));
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+       .total_length = -1,
+       .version = 2,
+       .prolog_length = -1,
+       .minimum_instruction_length = 1,        /* could be better when min instruction size != 1 */
+       .default_is_stmt = 1,   /* we don't take care about basic block */
+       .line_base = -5,        /* sensible value for line base ... */
+       .line_range = -14,     /* ... and line range are guessed statically */
+       .opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+       0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+       [DW_LNS_advance_pc]   = 1,
+       [DW_LNS_advance_line] = 1,
+       [DW_LNS_set_file] =  1,
+       [DW_LNS_set_column] = 1,
+       [DW_LNS_fixed_advance_pc] = 1,
+       [DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+       .total_length = -1,
+       .version = 2,
+       .debug_abbrev_offset = 0,     /* we reuse the same abbrev entries for all comp unit */
+       .pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+       buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+       buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+                                unsigned long data)
+{
+       do {
+               ubyte cur = data & 0x7F;
+               data >>= 7;
+               if (data)
+                       cur |= 0x80;
+               buffer_ext_add(be, &cur, 1);
+       } while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+       int more = 1;
+       int negative = data < 0;
+       int size = sizeof(long) * CHAR_BIT;
+       while (more) {
+               ubyte cur = data & 0x7F;
+               data >>= 7;
+               if (negative)
+                       data |= - (1 << (size - 7));
+               if ((data == 0 && !(cur & 0x40)) ||
+                   (data == -1l && (cur & 0x40)))
+                       more = 0;
+               else
+                       cur |= 0x80;
+               buffer_ext_add(be, &cur, 1);
+       }
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+                                void *data, size_t data_len)
+{
+       buffer_ext_add(be, (char *)"", 1);
+
+       emit_unsigned_LEB128(be, data_len + 1);
+
+       buffer_ext_add(be, &opcode, 1);
+       buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+       buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext  *be,
+                              ubyte opcode, long data)
+{
+       buffer_ext_add(be, &opcode, 1);
+       emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+                                unsigned long data)
+{
+       buffer_ext_add(be, &opcode, 1);
+       emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+       emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext  *be, long delta_lineno)
+{
+       emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+       emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+       emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+                                    const char *filename)
+{
+       buffer_ext_add(be, (void *)"", 1);
+
+       /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+       emit_unsigned_LEB128(be, strlen(filename) + 5);
+       emit_opcode(be, DW_LNE_define_file);
+       emit_string(be, filename);
+       /* directory index 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+       /* last modification date on file 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+       /* filesize 0=do not know */
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+                                void *address)
+{
+       emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+                               unsigned int last_line,
+                               unsigned long last_vma)
+{
+       unsigned int temp;
+       unsigned long delta_addr;
+
+       /*
+        * delta from line_base
+        */
+       temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+       if (temp >= default_debug_line_header.line_range)
+               return 0;
+
+       /*
+        * delta of addresses
+        */
+       delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+       /* This is not sufficient to ensure opcode will be in [0-256] but
+        * sufficient to ensure when summing with the delta lineno we will
+        * not overflow the unsigned long opcode */
+
+       if (delta_addr <= 256 / default_debug_line_header.line_range) {
+               unsigned long opcode = temp +
+                       (delta_addr * default_debug_line_header.line_range) +
+                       default_debug_line_header.opcode_base;
+
+               return opcode <= 255 ? opcode : 0;
+       }
+       return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+                            struct debug_entry *ent, size_t nr_entry,
+                            unsigned long code_addr)
+{
+       size_t i;
+
+       /*
+        * Machine state at start of a statement program
+        * address = 0
+        * file    = 1
+        * line    = 1
+        * column  = 0
+        * is_stmt = default_is_stmt as given in the debug_line_header
+        * basic block = 0
+        * end sequence = 0
+        */
+
+       /* start state of the state machine we take care of */
+       unsigned long last_vma = code_addr;
+       char const  *cur_filename = NULL;
+       unsigned long cur_file_idx = 0;
+       int last_line = 1;
+
+       emit_lne_set_address(be, (void *)code_addr);
+
+       for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+               int need_copy = 0;
+               ubyte special_opcode;
+
+               /*
+                * check if filename changed, if so add it
+                */
+               if (!cur_filename || strcmp(cur_filename, ent->name)) {
+                       emit_lne_define_filename(be, ent->name);
+                       cur_filename = ent->name;
+                       emit_set_file(be, ++cur_file_idx);
+                       need_copy = 1;
+               }
+
+               special_opcode = get_special_opcode(ent, last_line, last_vma);
+               if (special_opcode != 0) {
+                       last_line = ent->lineno;
+                       last_vma  = ent->addr;
+                       emit_opcode(be, special_opcode);
+               } else {
+                       /*
+                        * lines differ, emit line delta
+                        */
+                       if (last_line != ent->lineno) {
+                               emit_advance_lineno(be, ent->lineno - last_line);
+                               last_line = ent->lineno;
+                               need_copy = 1;
+                       }
+                       /*
+                        * addresses differ, emit address delta
+                        */
+                       if (last_vma != ent->addr) {
+                               emit_advance_pc(be, ent->addr - last_vma);
+                               last_vma = ent->addr;
+                               need_copy = 1;
+                       }
+                       /*
+                        * add new row to matrix
+                        */
+                       if (need_copy)
+                               emit_opcode(be, DW_LNS_copy);
+               }
+       }
+}
+
+static void add_debug_line(struct buffer_ext *be,
+       struct debug_entry *ent, size_t nr_entry,
+       unsigned long code_addr)
+{
+       struct debug_line_header * dbg_header;
+       size_t old_size;
+
+       old_size = buffer_ext_size(be);
+
+       buffer_ext_add(be, (void *)&default_debug_line_header,
+                sizeof(default_debug_line_header));
+
+       buffer_ext_add(be, &standard_opcode_length,  sizeof(standard_opcode_length));
+
+       // empty directory entry
+       buffer_ext_add(be, (void *)"", 1);
+
+       // empty filename directory
+       buffer_ext_add(be, (void *)"", 1);
+
+       dbg_header = buffer_ext_addr(be) + old_size;
+       dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+               offsetof(struct debug_line_header, minimum_instruction_length);
+
+       emit_lineno_info(be, ent, nr_entry, code_addr);
+
+       emit_lne_end_of_sequence(be);
+
+       dbg_header = buffer_ext_addr(be) + old_size;
+       dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+               offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+        emit_unsigned_LEB128(be, 1);
+        emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+        emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+        emit_unsigned_LEB128(be, DW_AT_stmt_list);
+        emit_unsigned_LEB128(be, DW_FORM_data4);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+        emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+                    size_t offset_debug_line)
+{
+       struct compilation_unit_header *comp_unit_header;
+       size_t old_size = buffer_ext_size(be);
+
+       buffer_ext_add(be, &default_comp_unit_header,
+                      sizeof(default_comp_unit_header));
+
+       emit_unsigned_LEB128(be, 1);
+       emit_uword(be, offset_debug_line);
+
+       comp_unit_header = buffer_ext_addr(be) + old_size;
+       comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+               offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+                      void *debug, int nr_debug_entries,
+                      struct buffer_ext *dl,
+                      struct buffer_ext *da,
+                      struct buffer_ext *di)
+{
+       struct debug_entry *ent = debug;
+       int i;
+
+       for (i = 0; i < nr_debug_entries; i++) {
+               ent->addr = ent->addr - code_addr;
+               ent = debug_entry_next(ent);
+       }
+       add_compilation_unit(di, buffer_ext_size(dl));
+       add_debug_line(dl, debug, nr_debug_entries, 0);
+       add_debug_abbrev(da);
+       if (0) buffer_ext_dump(da, "abbrev");
+
+       return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+       Elf_Data *d;
+       Elf_Scn *scn;
+       Elf_Shdr *shdr;
+       struct buffer_ext dl, di, da;
+       int ret;
+
+       buffer_ext_init(&dl);
+       buffer_ext_init(&di);
+       buffer_ext_init(&da);
+
+       ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di);
+       if (ret)
+               return -1;
+       /*
+        * setup .debug_line section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               return -1;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               return -1;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = buffer_ext_addr(&dl);
+       d->d_type = ELF_T_BYTE;
+       d->d_size = buffer_ext_size(&dl);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               return -1;
+       }
+
+       shdr->sh_name = 52; /* .debug_line */
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup .debug_info section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               return -1;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               return -1;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = buffer_ext_addr(&di);
+       d->d_type = ELF_T_BYTE;
+       d->d_size = buffer_ext_size(&di);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               return -1;
+       }
+
+       shdr->sh_name = 64; /* .debug_info */
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * setup .debug_abbrev section
+        */
+       scn = elf_newscn(e);
+       if (!scn) {
+               warnx("cannot create section");
+               return -1;
+       }
+
+       d = elf_newdata(scn);
+       if (!d) {
+               warnx("cannot get new data");
+               return -1;
+       }
+
+       d->d_align = 1;
+       d->d_off = 0LL;
+       d->d_buf = buffer_ext_addr(&da);
+       d->d_type = ELF_T_BYTE;
+       d->d_size = buffer_ext_size(&da);
+       d->d_version = EV_CURRENT;
+
+       shdr = elf_getshdr(scn);
+       if (!shdr) {
+               warnx("cannot get section header");
+               return -1;
+       }
+
+       shdr->sh_name = 76; /* .debug_info */
+       shdr->sh_type = SHT_PROGBITS;
+       shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+       shdr->sh_flags = 0;
+       shdr->sh_entsize = 0;
+
+       /*
+        * now we update the ELF image with all the sections
+        */
+       if (elf_update(e, ELF_C_WRITE) < 0) {
+               warnx("elf_update debug failed");
+               return -1;
+       }
+       return 0;
+}
index f50b7235ecb6558d167a475bf4199e8b05f52143..73e38e472ecd7771695b7ac2d91829a35529f419 100644 (file)
@@ -23,6 +23,8 @@
 #include "strbuf.h"
 #include "build-id.h"
 #include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
 
 /*
  * magic2 = "PERFILE2"
@@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h,
        return err;
 }
 
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+       struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+       struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+       return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+       if (a->level != b->level)
+               return false;
+
+       if (a->line_size != b->line_size)
+               return false;
+
+       if (a->sets != b->sets)
+               return false;
+
+       if (a->ways != b->ways)
+               return false;
+
+       if (strcmp(a->type, b->type))
+               return false;
+
+       if (strcmp(a->size, b->size))
+               return false;
+
+       if (strcmp(a->map, b->map))
+               return false;
+
+       return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+       char path[PATH_MAX], file[PATH_MAX];
+       struct stat st;
+       size_t len;
+
+       scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+       scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+       if (stat(file, &st))
+               return 1;
+
+       scnprintf(file, PATH_MAX, "%s/level", path);
+       if (sysfs__read_int(file, (int *) &cache->level))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+       if (sysfs__read_int(file, (int *) &cache->line_size))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+       if (sysfs__read_int(file, (int *) &cache->sets))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+       if (sysfs__read_int(file, (int *) &cache->ways))
+               return -1;
+
+       scnprintf(file, PATH_MAX, "%s/type", path);
+       if (sysfs__read_str(file, &cache->type, &len))
+               return -1;
+
+       cache->type[len] = 0;
+       cache->type = rtrim(cache->type);
+
+       scnprintf(file, PATH_MAX, "%s/size", path);
+       if (sysfs__read_str(file, &cache->size, &len)) {
+               free(cache->type);
+               return -1;
+       }
+
+       cache->size[len] = 0;
+       cache->size = rtrim(cache->size);
+
+       scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+       if (sysfs__read_str(file, &cache->map, &len)) {
+               free(cache->map);
+               free(cache->type);
+               return -1;
+       }
+
+       cache->map[len] = 0;
+       cache->map = rtrim(cache->map);
+       return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+       fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
+{
+       u32 i, cnt = 0;
+       long ncpus;
+       u32 nr, cpu;
+       u16 level;
+
+       ncpus = sysconf(_SC_NPROCESSORS_CONF);
+       if (ncpus < 0)
+               return -1;
+
+       nr = (u32)(ncpus & UINT_MAX);
+
+       for (cpu = 0; cpu < nr; cpu++) {
+               for (level = 0; level < 10; level++) {
+                       struct cpu_cache_level c;
+                       int err;
+
+                       err = cpu_cache_level__read(&c, cpu, level);
+                       if (err < 0)
+                               return err;
+
+                       if (err == 1)
+                               break;
+
+                       for (i = 0; i < cnt; i++) {
+                               if (cpu_cache_level__cmp(&c, &caches[i]))
+                                       break;
+                       }
+
+                       if (i == cnt)
+                               caches[cnt++] = c;
+                       else
+                               cpu_cache_level__free(&c);
+
+                       if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
+                               goto out;
+               }
+       }
+ out:
+       *cntp = cnt;
+       return 0;
+}
+
+#define MAX_CACHES 2000
+
+static int write_cache(int fd, struct perf_header *h __maybe_unused,
+                         struct perf_evlist *evlist __maybe_unused)
+{
+       struct cpu_cache_level caches[MAX_CACHES];
+       u32 cnt = 0, i, version = 1;
+       int ret;
+
+       ret = build_caches(caches, MAX_CACHES, &cnt);
+       if (ret)
+               goto out;
+
+       qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+       ret = do_write(fd, &version, sizeof(u32));
+       if (ret < 0)
+               goto out;
+
+       ret = do_write(fd, &cnt, sizeof(u32));
+       if (ret < 0)
+               goto out;
+
+       for (i = 0; i < cnt; i++) {
+               struct cpu_cache_level *c = &caches[i];
+
+               #define _W(v)                                   \
+                       ret = do_write(fd, &c->v, sizeof(u32)); \
+                       if (ret < 0)                            \
+                               goto out;
+
+               _W(level)
+               _W(line_size)
+               _W(sets)
+               _W(ways)
+               #undef _W
+
+               #define _W(v)                                           \
+                       ret = do_write_string(fd, (const char *) c->v); \
+                       if (ret < 0)                                    \
+                               goto out;
+
+               _W(type)
+               _W(size)
+               _W(map)
+               #undef _W
+       }
+
+out:
+       for (i = 0; i < cnt; i++)
+               cpu_cache_level__free(&caches[i]);
+       return ret;
+}
+
 static int write_stat(int fd __maybe_unused,
                      struct perf_header *h __maybe_unused,
                      struct perf_evlist *evlist __maybe_unused)
@@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused,
        fprintf(fp, "# contains stat data\n");
 }
 
+static void print_cache(struct perf_header *ph __maybe_unused,
+                       int fd __maybe_unused, FILE *fp __maybe_unused)
+{
+       int i;
+
+       fprintf(fp, "# CPU cache info:\n");
+       for (i = 0; i < ph->env.caches_cnt; i++) {
+               fprintf(fp, "#  ");
+               cpu_cache_level__fprintf(fp, &ph->env.caches[i]);
+       }
+}
+
 static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
                               FILE *fp)
 {
@@ -1920,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section,
        return err;
 }
 
+static int process_cache(struct perf_file_section *section __maybe_unused,
+                        struct perf_header *ph __maybe_unused, int fd __maybe_unused,
+                        void *data __maybe_unused)
+{
+       struct cpu_cache_level *caches;
+       u32 cnt, i, version;
+
+       if (readn(fd, &version, sizeof(version)) != sizeof(version))
+               return -1;
+
+       if (ph->needs_swap)
+               version = bswap_32(version);
+
+       if (version != 1)
+               return -1;
+
+       if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt))
+               return -1;
+
+       if (ph->needs_swap)
+               cnt = bswap_32(cnt);
+
+       caches = zalloc(sizeof(*caches) * cnt);
+       if (!caches)
+               return -1;
+
+       for (i = 0; i < cnt; i++) {
+               struct cpu_cache_level c;
+
+               #define _R(v)                                           \
+                       if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\
+                               goto out_free_caches;                   \
+                       if (ph->needs_swap)                             \
+                               c.v = bswap_32(c.v);                    \
+
+               _R(level)
+               _R(line_size)
+               _R(sets)
+               _R(ways)
+               #undef _R
+
+               #define _R(v)                           \
+                       c.v = do_read_string(fd, ph);   \
+                       if (!c.v)                       \
+                               goto out_free_caches;
+
+               _R(type)
+               _R(size)
+               _R(map)
+               #undef _R
+
+               caches[i] = c;
+       }
+
+       ph->env.caches = caches;
+       ph->env.caches_cnt = cnt;
+       return 0;
+out_free_caches:
+       free(caches);
+       return -1;
+}
+
 struct feature_ops {
        int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
        void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1962,6 +2231,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPP(HEADER_GROUP_DESC,     group_desc),
        FEAT_OPP(HEADER_AUXTRACE,       auxtrace),
        FEAT_OPA(HEADER_STAT,           stat),
+       FEAT_OPF(HEADER_CACHE,          cache),
 };
 
 struct header_print_data {
index cff9892452ee393764726a12895ad95d36f766fe..3d87ca823c0ae55591e753f6f928802e831c0e3c 100644 (file)
@@ -32,6 +32,7 @@ enum {
        HEADER_GROUP_DESC,
        HEADER_AUXTRACE,
        HEADER_STAT,
+       HEADER_CACHE,
        HEADER_LAST_FEATURE,
        HEADER_FEAT_BITS        = 256,
 };
index dc1e41c9b054b186c0d9349d97e9d0f467099ac0..43a98a4dc1e1e90c7079fba26929022ec0959a31 100644 (file)
@@ -6,7 +6,8 @@
 static int autocorrect;
 static struct cmdnames aliases;
 
-static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
+static int perf_unknown_cmd_config(const char *var, const char *value,
+                                  void *cb __maybe_unused)
 {
        if (!strcmp(var, "help.autocorrect"))
                autocorrect = perf_config_int(var,value);
@@ -14,7 +15,7 @@ static int perf_unknown_cmd_config(const char *var, const char *value, void *cb)
        if (!prefixcmp(var, "alias."))
                add_cmdname(&aliases, var + 6, strlen(var + 6));
 
-       return perf_default_config(var, value, cb);
+       return 0;
 }
 
 static int levenshtein_compare(const void *p1, const void *p2)
index 68a7612019dc3c3be315604693b68170183044d6..290b3cbf68772de883bff4fee5c2294234dbefd3 100644 (file)
@@ -179,6 +179,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
        if (h->transaction)
                hists__new_col_len(hists, HISTC_TRANSACTION,
                                   hist_entry__transaction_len());
+
+       if (h->trace_output)
+               hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
 }
 
 void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -245,6 +248,8 @@ static void he_stat__decay(struct he_stat *he_stat)
        /* XXX need decay for weight too? */
 }
 
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
 static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 {
        u64 prev_period = he->stat.period;
@@ -260,21 +265,45 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
 
        diff = prev_period - he->stat.period;
 
-       hists->stats.total_period -= diff;
-       if (!he->filtered)
-               hists->stats.total_non_filtered_period -= diff;
+       if (!he->depth) {
+               hists->stats.total_period -= diff;
+               if (!he->filtered)
+                       hists->stats.total_non_filtered_period -= diff;
+       }
+
+       if (!he->leaf) {
+               struct hist_entry *child;
+               struct rb_node *node = rb_first(&he->hroot_out);
+               while (node) {
+                       child = rb_entry(node, struct hist_entry, rb_node);
+                       node = rb_next(node);
+
+                       if (hists__decay_entry(hists, child))
+                               hists__delete_entry(hists, child);
+               }
+       }
 
        return he->stat.period == 0;
 }
 
 static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
 {
-       rb_erase(&he->rb_node, &hists->entries);
+       struct rb_root *root_in;
+       struct rb_root *root_out;
 
-       if (sort__need_collapse)
-               rb_erase(&he->rb_node_in, &hists->entries_collapsed);
-       else
-               rb_erase(&he->rb_node_in, hists->entries_in);
+       if (he->parent_he) {
+               root_in  = &he->parent_he->hroot_in;
+               root_out = &he->parent_he->hroot_out;
+       } else {
+               if (sort__need_collapse)
+                       root_in = &hists->entries_collapsed;
+               else
+                       root_in = hists->entries_in;
+               root_out = &hists->entries;
+       }
+
+       rb_erase(&he->rb_node_in, root_in);
+       rb_erase(&he->rb_node, root_out);
 
        --hists->nr_entries;
        if (!he->filtered)
@@ -393,6 +422,9 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
                }
                INIT_LIST_HEAD(&he->pairs.node);
                thread__get(he->thread);
+
+               if (!symbol_conf.report_hierarchy)
+                       he->leaf = true;
        }
 
        return he;
@@ -405,6 +437,16 @@ static u8 symbol__parent_filter(const struct symbol *parent)
        return 0;
 }
 
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+       if (!symbol_conf.use_callchain)
+               return;
+
+       he->hists->callchain_period += period;
+       if (!he->filtered)
+               he->hists->callchain_non_filtered_period += period;
+}
+
 static struct hist_entry *hists__findnew_entry(struct hists *hists,
                                               struct hist_entry *entry,
                                               struct addr_location *al,
@@ -432,8 +474,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
                cmp = hist_entry__cmp(he, entry);
 
                if (!cmp) {
-                       if (sample_self)
+                       if (sample_self) {
                                he_stat__add_period(&he->stat, period, weight);
+                               hist_entry__add_callchain_period(he, period);
+                       }
                        if (symbol_conf.cumulate_callchain)
                                he_stat__add_period(he->stat_acc, period, weight);
 
@@ -466,6 +510,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
        if (!he)
                return NULL;
 
+       if (sample_self)
+               hist_entry__add_callchain_period(he, period);
        hists->nr_entries++;
 
        rb_link_node(&he->rb_node_in, parent, p);
@@ -951,10 +997,15 @@ out:
 int64_t
 hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
+       struct hists *hists = left->hists;
        struct perf_hpp_fmt *fmt;
        int64_t cmp = 0;
 
-       perf_hpp__for_each_sort_list(fmt) {
+       hists__for_each_sort_list(hists, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt) &&
+                   !perf_hpp__defined_dynamic_entry(fmt, hists))
+                       continue;
+
                cmp = fmt->cmp(fmt, left, right);
                if (cmp)
                        break;
@@ -966,10 +1017,15 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 int64_t
 hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 {
+       struct hists *hists = left->hists;
        struct perf_hpp_fmt *fmt;
        int64_t cmp = 0;
 
-       perf_hpp__for_each_sort_list(fmt) {
+       hists__for_each_sort_list(hists, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt) &&
+                   !perf_hpp__defined_dynamic_entry(fmt, hists))
+                       continue;
+
                cmp = fmt->collapse(fmt, left, right);
                if (cmp)
                        break;
@@ -1005,18 +1061,251 @@ void hist_entry__delete(struct hist_entry *he)
        free(he);
 }
 
+/*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max lenght for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+                                  struct perf_hpp_fmt *fmt, int printed)
+{
+       if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+               const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists));
+               if (printed < width) {
+                       advance_hpp(hpp, printed);
+                       printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+               }
+       }
+
+       return printed;
+}
+
 /*
  * collapse the histogram
  */
 
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
-                                 struct rb_root *root, struct hist_entry *he)
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+                                      enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+       return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+                                               enum hist_filter type,
+                                               fmt_chk_fn check)
+{
+       struct perf_hpp_fmt *fmt;
+       bool type_match = false;
+       struct hist_entry *parent = he->parent_he;
+
+       switch (type) {
+       case HIST_FILTER__THREAD:
+               if (symbol_conf.comm_list == NULL &&
+                   symbol_conf.pid_list == NULL &&
+                   symbol_conf.tid_list == NULL)
+                       return;
+               break;
+       case HIST_FILTER__DSO:
+               if (symbol_conf.dso_list == NULL)
+                       return;
+               break;
+       case HIST_FILTER__SYMBOL:
+               if (symbol_conf.sym_list == NULL)
+                       return;
+               break;
+       case HIST_FILTER__PARENT:
+       case HIST_FILTER__GUEST:
+       case HIST_FILTER__HOST:
+       case HIST_FILTER__SOCKET:
+       default:
+               return;
+       }
+
+       /* if it's filtered by own fmt, it has to have filter bits */
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               if (check(fmt)) {
+                       type_match = true;
+                       break;
+               }
+       }
+
+       if (type_match) {
+               /*
+                * If the filter is for current level entry, propagate
+                * filter marker to parents.  The marker bit was
+                * already set by default so it only needs to clear
+                * non-filtered entries.
+                */
+               if (!(he->filtered & (1 << type))) {
+                       while (parent) {
+                               parent->filtered &= ~(1 << type);
+                               parent = parent->parent_he;
+                       }
+               }
+       } else {
+               /*
+                * If current entry doesn't have matching formats, set
+                * filter marker for upper level entries.  it will be
+                * cleared if its lower level entries is not filtered.
+                *
+                * For lower-level entries, it inherits parent's
+                * filter bit so that lower level entries of a
+                * non-filtered entry won't set the filter marker.
+                */
+               if (parent == NULL)
+                       he->filtered |= (1 << type);
+               else
+                       he->filtered |= (parent->filtered & (1 << type));
+       }
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+       hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+                                           check_thread_entry);
+
+       hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+                                           perf_hpp__is_dso_entry);
+
+       hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+                                           perf_hpp__is_sym_entry);
+
+       hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+                                                struct rb_root *root,
+                                                struct hist_entry *he,
+                                                struct hist_entry *parent_he,
+                                                struct perf_hpp_list *hpp_list)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct hist_entry *iter, *new;
+       struct perf_hpp_fmt *fmt;
+       int64_t cmp;
+
+       while (*p != NULL) {
+               parent = *p;
+               iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+               cmp = 0;
+               perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+                       cmp = fmt->collapse(fmt, iter, he);
+                       if (cmp)
+                               break;
+               }
+
+               if (!cmp) {
+                       he_stat__add_stat(&iter->stat, &he->stat);
+                       return iter;
+               }
+
+               if (cmp < 0)
+                       p = &parent->rb_left;
+               else
+                       p = &parent->rb_right;
+       }
+
+       new = hist_entry__new(he, true);
+       if (new == NULL)
+               return NULL;
+
+       hists->nr_entries++;
+
+       /* save related format list for output */
+       new->hpp_list = hpp_list;
+       new->parent_he = parent_he;
+
+       hist_entry__apply_hierarchy_filters(new);
+
+       /* some fields are now passed to 'new' */
+       perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+               if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+                       he->trace_output = NULL;
+               else
+                       new->trace_output = NULL;
+
+               if (perf_hpp__is_srcline_entry(fmt))
+                       he->srcline = NULL;
+               else
+                       new->srcline = NULL;
+
+               if (perf_hpp__is_srcfile_entry(fmt))
+                       he->srcfile = NULL;
+               else
+                       new->srcfile = NULL;
+       }
+
+       rb_link_node(&new->rb_node_in, parent, p);
+       rb_insert_color(&new->rb_node_in, root);
+       return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+                                        struct rb_root *root,
+                                        struct hist_entry *he)
+{
+       struct perf_hpp_list_node *node;
+       struct hist_entry *new_he = NULL;
+       struct hist_entry *parent = NULL;
+       int depth = 0;
+       int ret = 0;
+
+       list_for_each_entry(node, &hists->hpp_formats, list) {
+               /* skip period (overhead) and elided columns */
+               if (node->level == 0 || node->skip)
+                       continue;
+
+               /* insert copy of 'he' for each fmt into the hierarchy */
+               new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+               if (new_he == NULL) {
+                       ret = -1;
+                       break;
+               }
+
+               root = &new_he->hroot_in;
+               new_he->depth = depth++;
+               parent = new_he;
+       }
+
+       if (new_he) {
+               new_he->leaf = true;
+
+               if (symbol_conf.use_callchain) {
+                       callchain_cursor_reset(&callchain_cursor);
+                       if (callchain_merge(&callchain_cursor,
+                                           new_he->callchain,
+                                           he->callchain) < 0)
+                               ret = -1;
+               }
+       }
+
+       /* 'he' is no longer used */
+       hist_entry__delete(he);
+
+       /* return 0 (or -1) since it already applied filters */
+       return ret;
+}
+
+int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
+                                struct hist_entry *he)
 {
        struct rb_node **p = &root->rb_node;
        struct rb_node *parent = NULL;
        struct hist_entry *iter;
        int64_t cmp;
 
+       if (symbol_conf.report_hierarchy)
+               return hists__hierarchy_insert_entry(hists, root, he);
+
        while (*p != NULL) {
                parent = *p;
                iter = rb_entry(parent, struct hist_entry, rb_node_in);
@@ -1024,18 +1313,21 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
                cmp = hist_entry__collapse(iter, he);
 
                if (!cmp) {
+                       int ret = 0;
+
                        he_stat__add_stat(&iter->stat, &he->stat);
                        if (symbol_conf.cumulate_callchain)
                                he_stat__add_stat(iter->stat_acc, he->stat_acc);
 
                        if (symbol_conf.use_callchain) {
                                callchain_cursor_reset(&callchain_cursor);
-                               callchain_merge(&callchain_cursor,
-                                               iter->callchain,
-                                               he->callchain);
+                               if (callchain_merge(&callchain_cursor,
+                                                   iter->callchain,
+                                                   he->callchain) < 0)
+                                       ret = -1;
                        }
                        hist_entry__delete(he);
-                       return false;
+                       return ret;
                }
 
                if (cmp < 0)
@@ -1047,7 +1339,7 @@ bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
 
        rb_link_node(&he->rb_node_in, parent, p);
        rb_insert_color(&he->rb_node_in, root);
-       return true;
+       return 1;
 }
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
@@ -1073,14 +1365,15 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
        hists__filter_entry_by_socket(hists, he);
 }
 
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 {
        struct rb_root *root;
        struct rb_node *next;
        struct hist_entry *n;
+       int ret;
 
        if (!sort__need_collapse)
-               return;
+               return 0;
 
        hists->nr_entries = 0;
 
@@ -1095,7 +1388,11 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
                next = rb_next(&n->rb_node_in);
 
                rb_erase(&n->rb_node_in, root);
-               if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) {
+               ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+               if (ret < 0)
+                       return -1;
+
+               if (ret) {
                        /*
                         * If it wasn't combined with one of the entries already
                         * collapsed, we need to apply the filters that may have
@@ -1106,14 +1403,16 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
                if (prog)
                        ui_progress__update(prog, 1);
        }
+       return 0;
 }
 
 static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
 {
+       struct hists *hists = a->hists;
        struct perf_hpp_fmt *fmt;
        int64_t cmp = 0;
 
-       perf_hpp__for_each_sort_list(fmt) {
+       hists__for_each_sort_list(hists, fmt) {
                if (perf_hpp__should_skip(fmt, a->hists))
                        continue;
 
@@ -1154,6 +1453,113 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h)
        hists->stats.total_period += h->stat.period;
 }
 
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+       struct rb_node *node;
+       struct hist_entry *he;
+
+       node = rb_first(&hists->entries);
+
+       hists->stats.total_period = 0;
+       hists->stats.total_non_filtered_period = 0;
+
+       /*
+        * recalculate total period using top-level entries only
+        * since lower level entries only see non-filtered entries
+        * but upper level entries have sum of both entries.
+        */
+       while (node) {
+               he = rb_entry(node, struct hist_entry, rb_node);
+               node = rb_next(node);
+
+               hists->stats.total_period += he->stat.period;
+               if (!he->filtered)
+                       hists->stats.total_non_filtered_period += he->stat.period;
+       }
+}
+
+static void hierarchy_insert_output_entry(struct rb_root *root,
+                                         struct hist_entry *he)
+{
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct hist_entry *iter;
+       struct perf_hpp_fmt *fmt;
+
+       while (*p != NULL) {
+               parent = *p;
+               iter = rb_entry(parent, struct hist_entry, rb_node);
+
+               if (hist_entry__sort(he, iter) > 0)
+                       p = &parent->rb_left;
+               else
+                       p = &parent->rb_right;
+       }
+
+       rb_link_node(&he->rb_node, parent, p);
+       rb_insert_color(&he->rb_node, root);
+
+       /* update column width of dynamic entry */
+       perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt))
+                       fmt->sort(fmt, he, NULL);
+       }
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+                                          struct ui_progress *prog,
+                                          struct rb_root *root_in,
+                                          struct rb_root *root_out,
+                                          u64 min_callchain_hits,
+                                          bool use_callchain)
+{
+       struct rb_node *node;
+       struct hist_entry *he;
+
+       *root_out = RB_ROOT;
+       node = rb_first(root_in);
+
+       while (node) {
+               he = rb_entry(node, struct hist_entry, rb_node_in);
+               node = rb_next(node);
+
+               hierarchy_insert_output_entry(root_out, he);
+
+               if (prog)
+                       ui_progress__update(prog, 1);
+
+               if (!he->leaf) {
+                       hists__hierarchy_output_resort(hists, prog,
+                                                      &he->hroot_in,
+                                                      &he->hroot_out,
+                                                      min_callchain_hits,
+                                                      use_callchain);
+                       hists->nr_entries++;
+                       if (!he->filtered) {
+                               hists->nr_non_filtered_entries++;
+                               hists__calc_col_len(hists, he);
+                       }
+
+                       continue;
+               }
+
+               if (!use_callchain)
+                       continue;
+
+               if (callchain_param.mode == CHAIN_GRAPH_REL) {
+                       u64 total = he->stat.period;
+
+                       if (symbol_conf.cumulate_callchain)
+                               total = he->stat_acc->period;
+
+                       min_callchain_hits = total * (callchain_param.min_percent / 100);
+               }
+
+               callchain_param.sort(&he->sorted_chain, he->callchain,
+                                    min_callchain_hits, &callchain_param);
+       }
+}
+
 static void __hists__insert_output_entry(struct rb_root *entries,
                                         struct hist_entry *he,
                                         u64 min_callchain_hits,
@@ -1162,10 +1568,20 @@ static void __hists__insert_output_entry(struct rb_root *entries,
        struct rb_node **p = &entries->rb_node;
        struct rb_node *parent = NULL;
        struct hist_entry *iter;
+       struct perf_hpp_fmt *fmt;
+
+       if (use_callchain) {
+               if (callchain_param.mode == CHAIN_GRAPH_REL) {
+                       u64 total = he->stat.period;
+
+                       if (symbol_conf.cumulate_callchain)
+                               total = he->stat_acc->period;
 
-       if (use_callchain)
+                       min_callchain_hits = total * (callchain_param.min_percent / 100);
+               }
                callchain_param.sort(&he->sorted_chain, he->callchain,
                                      min_callchain_hits, &callchain_param);
+       }
 
        while (*p != NULL) {
                parent = *p;
@@ -1179,23 +1595,41 @@ static void __hists__insert_output_entry(struct rb_root *entries,
 
        rb_link_node(&he->rb_node, parent, p);
        rb_insert_color(&he->rb_node, entries);
+
+       perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+               if (perf_hpp__is_dynamic_entry(fmt) &&
+                   perf_hpp__defined_dynamic_entry(fmt, he->hists))
+                       fmt->sort(fmt, he, NULL);  /* update column width */
+       }
 }
 
-void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+                         bool use_callchain)
 {
        struct rb_root *root;
        struct rb_node *next;
        struct hist_entry *n;
+       u64 callchain_total;
        u64 min_callchain_hits;
-       struct perf_evsel *evsel = hists_to_evsel(hists);
-       bool use_callchain;
 
-       if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
-               use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
-       else
-               use_callchain = symbol_conf.use_callchain;
+       callchain_total = hists->callchain_period;
+       if (symbol_conf.filter_relative)
+               callchain_total = hists->callchain_non_filtered_period;
 
-       min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100);
+       min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+       hists__reset_stats(hists);
+       hists__reset_col_len(hists);
+
+       if (symbol_conf.report_hierarchy) {
+               hists__hierarchy_output_resort(hists, prog,
+                                              &hists->entries_collapsed,
+                                              &hists->entries,
+                                              min_callchain_hits,
+                                              use_callchain);
+               hierarchy_recalc_total_periods(hists);
+               return;
+       }
 
        if (sort__need_collapse)
                root = &hists->entries_collapsed;
@@ -1205,9 +1639,6 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
        next = rb_first(root);
        hists->entries = RB_ROOT;
 
-       hists__reset_stats(hists);
-       hists__reset_col_len(hists);
-
        while (next) {
                n = rb_entry(next, struct hist_entry, rb_node_in);
                next = rb_next(&n->rb_node_in);
@@ -1223,15 +1654,136 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog)
        }
 }
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+{
+       bool use_callchain;
+
+       if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+               use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN;
+       else
+               use_callchain = symbol_conf.use_callchain;
+
+       output_resort(evsel__hists(evsel), prog, use_callchain);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+       output_resort(hists, prog, symbol_conf.use_callchain);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+       if (he->leaf || hmd == HMD_FORCE_SIBLING)
+               return false;
+
+       if (he->unfolded || hmd == HMD_FORCE_CHILD)
+               return true;
+
+       return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+       struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+       while (can_goto_child(he, HMD_NORMAL)) {
+               node = rb_last(&he->hroot_out);
+               he = rb_entry(node, struct hist_entry, rb_node);
+       }
+       return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+       struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+       if (can_goto_child(he, hmd))
+               node = rb_first(&he->hroot_out);
+       else
+               node = rb_next(node);
+
+       while (node == NULL) {
+               he = he->parent_he;
+               if (he == NULL)
+                       break;
+
+               node = rb_next(&he->rb_node);
+       }
+       return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+       struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+       node = rb_prev(node);
+       if (node)
+               return rb_hierarchy_last(node);
+
+       he = he->parent_he;
+       if (he == NULL)
+               return NULL;
+
+       return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+       struct rb_node *node;
+       struct hist_entry *child;
+       float percent;
+
+       if (he->leaf)
+               return false;
+
+       node = rb_first(&he->hroot_out);
+       child = rb_entry(node, struct hist_entry, rb_node);
+
+       while (node && child->filtered) {
+               node = rb_next(node);
+               child = rb_entry(node, struct hist_entry, rb_node);
+       }
+
+       if (node)
+               percent = hist_entry__get_percent_limit(child);
+       else
+               percent = 0;
+
+       return node && percent >= limit;
+}
+
 static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
                                       enum hist_filter filter)
 {
        h->filtered &= ~(1 << filter);
+
+       if (symbol_conf.report_hierarchy) {
+               struct hist_entry *parent = h->parent_he;
+
+               while (parent) {
+                       he_stat__add_stat(&parent->stat, &h->stat);
+
+                       parent->filtered &= ~(1 << filter);
+
+                       if (parent->filtered)
+                               goto next;
+
+                       /* force fold unfiltered entry for simplicity */
+                       parent->unfolded = false;
+                       parent->has_no_entry = false;
+                       parent->row_offset = 0;
+                       parent->nr_rows = 0;
+next:
+                       parent = parent->parent_he;
+               }
+       }
+
        if (h->filtered)
                return;
 
        /* force fold unfiltered entry for simplicity */
        h->unfolded = false;
+       h->has_no_entry = false;
        h->row_offset = 0;
        h->nr_rows = 0;
 
@@ -1254,28 +1806,6 @@ static bool hists__filter_entry_by_dso(struct hists *hists,
        return false;
 }
 
-void hists__filter_by_dso(struct hists *hists)
-{
-       struct rb_node *nd;
-
-       hists->stats.nr_non_filtered_samples = 0;
-
-       hists__reset_filter_stats(hists);
-       hists__reset_col_len(hists);
-
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-               if (symbol_conf.exclude_other && !h->parent)
-                       continue;
-
-               if (hists__filter_entry_by_dso(hists, h))
-                       continue;
-
-               hists__remove_entry_filter(hists, h, HIST_FILTER__DSO);
-       }
-}
-
 static bool hists__filter_entry_by_thread(struct hists *hists,
                                          struct hist_entry *he)
 {
@@ -1288,25 +1818,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists,
        return false;
 }
 
-void hists__filter_by_thread(struct hists *hists)
-{
-       struct rb_node *nd;
-
-       hists->stats.nr_non_filtered_samples = 0;
-
-       hists__reset_filter_stats(hists);
-       hists__reset_col_len(hists);
-
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
-               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
-
-               if (hists__filter_entry_by_thread(hists, h))
-                       continue;
-
-               hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD);
-       }
-}
-
 static bool hists__filter_entry_by_symbol(struct hists *hists,
                                          struct hist_entry *he)
 {
@@ -1320,7 +1831,21 @@ static bool hists__filter_entry_by_symbol(struct hists *hists,
        return false;
 }
 
-void hists__filter_by_symbol(struct hists *hists)
+static bool hists__filter_entry_by_socket(struct hists *hists,
+                                         struct hist_entry *he)
+{
+       if ((hists->socket_filter > -1) &&
+           (he->socket != hists->socket_filter)) {
+               he->filtered |= (1 << HIST_FILTER__SOCKET);
+               return true;
+       }
+
+       return false;
+}
+
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
 {
        struct rb_node *nd;
 
@@ -1332,42 +1857,155 @@ void hists__filter_by_symbol(struct hists *hists)
        for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
 
-               if (hists__filter_entry_by_symbol(hists, h))
+               if (filter(hists, h))
                        continue;
 
-               hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL);
+               hists__remove_entry_filter(hists, h, type);
        }
 }
 
-static bool hists__filter_entry_by_socket(struct hists *hists,
-                                         struct hist_entry *he)
+static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
 {
-       if ((hists->socket_filter > -1) &&
-           (he->socket != hists->socket_filter)) {
-               he->filtered |= (1 << HIST_FILTER__SOCKET);
-               return true;
+       struct rb_node **p = &root->rb_node;
+       struct rb_node *parent = NULL;
+       struct hist_entry *iter;
+       struct rb_root new_root = RB_ROOT;
+       struct rb_node *nd;
+
+       while (*p != NULL) {
+               parent = *p;
+               iter = rb_entry(parent, struct hist_entry, rb_node);
+
+               if (hist_entry__sort(he, iter) > 0)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
        }
 
-       return false;
+       rb_link_node(&he->rb_node, parent, p);
+       rb_insert_color(&he->rb_node, root);
+
+       if (he->leaf || he->filtered)
+               return;
+
+       nd = rb_first(&he->hroot_out);
+       while (nd) {
+               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+               nd = rb_next(nd);
+               rb_erase(&h->rb_node, &he->hroot_out);
+
+               resort_filtered_entry(&new_root, h);
+       }
+
+       he->hroot_out = new_root;
 }
 
-void hists__filter_by_socket(struct hists *hists)
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
 {
        struct rb_node *nd;
+       struct rb_root new_root = RB_ROOT;
 
        hists->stats.nr_non_filtered_samples = 0;
 
        hists__reset_filter_stats(hists);
        hists__reset_col_len(hists);
 
-       for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+       nd = rb_first(&hists->entries);
+       while (nd) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+               int ret;
 
-               if (hists__filter_entry_by_socket(hists, h))
-                       continue;
+               ret = hist_entry__filter(h, type, arg);
 
-               hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET);
+               /*
+                * case 1. non-matching type
+                * zero out the period, set filter marker and move to child
+                */
+               if (ret < 0) {
+                       memset(&h->stat, 0, sizeof(h->stat));
+                       h->filtered |= (1 << type);
+
+                       nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+               }
+               /*
+                * case 2. matched type (filter out)
+                * set filter marker and move to next
+                */
+               else if (ret == 1) {
+                       h->filtered |= (1 << type);
+
+                       nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+               }
+               /*
+                * case 3. ok (not filtered)
+                * add period to hists and parents, erase the filter marker
+                * and move to next sibling
+                */
+               else {
+                       hists__remove_entry_filter(hists, h, type);
+
+                       nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+               }
+       }
+
+       hierarchy_recalc_total_periods(hists);
+
+       /*
+        * resort output after applying a new filter since filter in a lower
+        * hierarchy can change periods in a upper hierarchy.
+        */
+       nd = rb_first(&hists->entries);
+       while (nd) {
+               struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+               nd = rb_next(nd);
+               rb_erase(&h->rb_node, &hists->entries);
+
+               resort_filtered_entry(&new_root, h);
        }
+
+       hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+                                       hists->thread_filter);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__THREAD,
+                                     hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+                                       hists->dso_filter);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__DSO,
+                                     hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+                                       hists->symbol_filter_str);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+                                     hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+                                       &hists->socket_filter);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+                                     hists__filter_entry_by_socket);
 }
 
 void events_stats__inc(struct events_stats *stats, u32 type)
@@ -1585,7 +2223,7 @@ int perf_hist_config(const char *var, const char *value)
        return 0;
 }
 
-int __hists__init(struct hists *hists)
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
 {
        memset(hists, 0, sizeof(*hists));
        hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
@@ -1594,6 +2232,8 @@ int __hists__init(struct hists *hists)
        hists->entries = RB_ROOT;
        pthread_mutex_init(&hists->lock, NULL);
        hists->socket_filter = -1;
+       hists->hpp_list = hpp_list;
+       INIT_LIST_HEAD(&hists->hpp_formats);
        return 0;
 }
 
@@ -1622,15 +2262,26 @@ static void hists__delete_all_entries(struct hists *hists)
 static void hists_evsel__exit(struct perf_evsel *evsel)
 {
        struct hists *hists = evsel__hists(evsel);
+       struct perf_hpp_fmt *fmt, *pos;
+       struct perf_hpp_list_node *node, *tmp;
 
        hists__delete_all_entries(hists);
+
+       list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+               perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+                       list_del(&fmt->list);
+                       free(fmt);
+               }
+               list_del(&node->list);
+               free(node);
+       }
 }
 
 static int hists_evsel__init(struct perf_evsel *evsel)
 {
        struct hists *hists = evsel__hists(evsel);
 
-       __hists__init(hists);
+       __hists__init(hists, &perf_hpp_list);
        return 0;
 }
 
@@ -1649,3 +2300,9 @@ int hists__init(void)
 
        return err;
 }
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+       INIT_LIST_HEAD(&list->fields);
+       INIT_LIST_HEAD(&list->sorts);
+}
index d4ec4822a1038611a7269aa0df2eb37171a647a6..ead18c82294faf881f1d4bc89be653e27ff5c708 100644 (file)
@@ -66,6 +66,8 @@ struct hists {
        struct rb_root          entries_collapsed;
        u64                     nr_entries;
        u64                     nr_non_filtered_entries;
+       u64                     callchain_period;
+       u64                     callchain_non_filtered_period;
        struct thread           *thread_filter;
        const struct dso        *dso_filter;
        const char              *uid_filter_str;
@@ -75,6 +77,9 @@ struct hists {
        u64                     event_stream;
        u16                     col_len[HISTC_NR_COLS];
        int                     socket_filter;
+       struct perf_hpp_list    *hpp_list;
+       struct list_head        hpp_formats;
+       int                     nr_hpp_node;
 };
 
 struct hist_entry_iter;
@@ -121,15 +126,21 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
                         int max_stack_depth, void *arg);
 
+struct perf_hpp;
+struct perf_hpp_fmt;
+
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
                              struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+                                  struct perf_hpp_fmt *fmt, int printed);
 void hist_entry__delete(struct hist_entry *he);
 
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
 void hists__output_resort(struct hists *hists, struct ui_progress *prog);
-void hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
 
 void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
 void hists__delete_entries(struct hists *hists);
@@ -185,10 +196,10 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel)
 }
 
 int hists__init(void);
-int __hists__init(struct hists *hists);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
-bool hists__collapse_insert_entry(struct hists *hists __maybe_unused,
+int hists__collapse_insert_entry(struct hists *hists,
                                  struct rb_root *root, struct hist_entry *he);
 
 struct perf_hpp {
@@ -214,28 +225,64 @@ struct perf_hpp_fmt {
                            struct hist_entry *a, struct hist_entry *b);
        int64_t (*sort)(struct perf_hpp_fmt *fmt,
                        struct hist_entry *a, struct hist_entry *b);
+       bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+       void (*free)(struct perf_hpp_fmt *fmt);
 
        struct list_head list;
        struct list_head sort_list;
        bool elide;
        int len;
        int user_len;
+       int idx;
+       int level;
+};
+
+struct perf_hpp_list {
+       struct list_head fields;
+       struct list_head sorts;
 };
 
-extern struct list_head perf_hpp__list;
-extern struct list_head perf_hpp__sort_list;
+extern struct perf_hpp_list perf_hpp_list;
+
+struct perf_hpp_list_node {
+       struct list_head        list;
+       struct perf_hpp_list    hpp;
+       int                     level;
+       bool                    skip;
+};
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+                                   struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+                                       struct perf_hpp_fmt *format);
+
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+       perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+       perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+       list_for_each_entry(format, &(_list)->fields, list)
 
-#define perf_hpp__for_each_format(format) \
-       list_for_each_entry(format, &perf_hpp__list, list)
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp)        \
+       list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
 
-#define perf_hpp__for_each_format_safe(format, tmp)    \
-       list_for_each_entry_safe(format, tmp, &perf_hpp__list, list)
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+       list_for_each_entry(format, &(_list)->sorts, sort_list)
 
-#define perf_hpp__for_each_sort_list(format) \
-       list_for_each_entry(format, &perf_hpp__sort_list, sort_list)
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp)     \
+       list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
 
-#define perf_hpp__for_each_sort_list_safe(format, tmp) \
-       list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list)
+#define hists__for_each_format(hists, format) \
+       perf_hpp_list__for_each_format((hists)->hpp_list, fmt)
+
+#define hists__for_each_sort_list(hists, format) \
+       perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt)
 
 extern struct perf_hpp_fmt perf_hpp__format[];
 
@@ -254,21 +301,29 @@ enum {
 };
 
 void perf_hpp__init(void);
-void perf_hpp__column_register(struct perf_hpp_fmt *format);
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format);
-void perf_hpp__column_enable(unsigned col);
-void perf_hpp__column_disable(unsigned col);
 void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+                                 struct perf_evlist *evlist);
 
-void perf_hpp__register_sort_field(struct perf_hpp_fmt *format);
-void perf_hpp__setup_output_field(void);
-void perf_hpp__reset_output_field(void);
-void perf_hpp__append_sort_keys(void);
 
 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
 bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
 bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
 
 static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
                                         struct hists *hists)
@@ -372,6 +427,7 @@ static inline int script_browse(const char *script_opt __maybe_unused)
 #endif
 
 unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
 
 void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
                          struct perf_sample *sample, bool nonany_branch_mode);
@@ -381,4 +437,26 @@ int parse_filter_percentage(const struct option *opt __maybe_unused,
                            const char *arg, int unset __maybe_unused);
 int perf_hist_config(const char *var, const char *value);
 
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+       HMD_NORMAL,
+       HMD_FORCE_SIBLING,
+       HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+                                   enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+       return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT  3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+
 #endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
new file mode 100644 (file)
index 0000000..a1e99da
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+extern int jit_process(struct perf_session *session,
+                      struct perf_data_file *output,
+                      struct machine *machine,
+                      char *filename,
+                      pid_t pid,
+                      u64 *nbytes);
+
+extern int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
new file mode 100644 (file)
index 0000000..cd272cc
--- /dev/null
@@ -0,0 +1,697 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "util.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "symbol.h"
+#include "strlist.h"
+#include <elf.h>
+
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "../builtin.h"
+
+struct jit_buf_desc {
+       struct perf_data_file *output;
+       struct perf_session *session;
+       struct machine *machine;
+       union jr_entry   *entry;
+       void             *buf;
+       uint64_t         sample_type;
+       size_t           bufsize;
+       FILE             *in;
+       bool             needs_bswap; /* handles cross-endianess */
+       void             *debug_data;
+       size_t           nr_debug_entries;
+       uint32_t         code_load_count;
+       u64              bytes_written;
+       struct rb_root   code_root;
+       char             dir[PATH_MAX];
+};
+
+struct debug_line_info {
+       unsigned long vma;
+       unsigned int lineno;
+       /* The filename format is unspecified, absolute path, relative etc. */
+       char const filename[0];
+};
+
+struct jit_tool {
+       struct perf_tool tool;
+       struct perf_data_file   output;
+       struct perf_data_file   input;
+       u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(char *filename,
+            const char *sym,
+            uint64_t code_addr,
+            const void *code,
+            int csize,
+            void *debug,
+            int nr_debug_entries)
+{
+       int ret, fd;
+
+       if (verbose > 0)
+               fprintf(stderr, "write ELF image %s\n", filename);
+
+       fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+       if (fd == -1) {
+               pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno));
+               return -1;
+       }
+
+        ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries);
+
+        close(fd);
+
+        if (ret)
+                unlink(filename);
+
+       return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+       if (!(jd && jd->in))
+               return;
+       funlockfile(jd->in);
+       fclose(jd->in);
+       jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+       struct perf_evsel *evsel;
+
+       /*
+        * check that all events use CLOCK_MONOTONIC
+        */
+       evlist__for_each(session->evlist, evsel) {
+               if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
+                       return -1;
+       }
+       return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+       struct jitheader header;
+       struct jr_prefix *prefix;
+       ssize_t bs, bsz = 0;
+       void *n, *buf = NULL;
+       int ret, retval = -1;
+
+       jd->in = fopen(name, "r");
+       if (!jd->in)
+               return -1;
+
+       bsz = hmax(sizeof(header), sizeof(*prefix));
+
+       buf = malloc(bsz);
+       if (!buf)
+               goto error;
+
+       /*
+        * protect from writer modifying the file while we are reading it
+        */
+       flockfile(jd->in);
+
+       ret = fread(buf, sizeof(header), 1, jd->in);
+       if (ret != 1)
+               goto error;
+
+       memcpy(&header, buf, sizeof(header));
+
+       if (header.magic != JITHEADER_MAGIC) {
+               if (header.magic != JITHEADER_MAGIC_SW)
+                       goto error;
+               jd->needs_bswap = true;
+       }
+
+       if (jd->needs_bswap) {
+               header.version    = bswap_32(header.version);
+               header.total_size = bswap_32(header.total_size);
+               header.pid        = bswap_32(header.pid);
+               header.elf_mach   = bswap_32(header.elf_mach);
+               header.timestamp  = bswap_64(header.timestamp);
+               header.flags      = bswap_64(header.flags);
+       }
+
+       if (verbose > 2)
+               pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n",
+                       header.version,
+                       header.total_size,
+                       (unsigned long long)header.timestamp,
+                       header.pid,
+                       header.elf_mach);
+
+       if (header.flags & JITDUMP_FLAGS_RESERVED) {
+               pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+                      (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+               goto error;
+       }
+
+       /*
+        * validate event is using the correct clockid
+        */
+       if (jit_validate_events(jd->session)) {
+               pr_err("error, jitted code must be sampled with perf record -k 1\n");
+               goto error;
+       }
+
+       bs = header.total_size - sizeof(header);
+
+       if (bs > bsz) {
+               n = realloc(buf, bs);
+               if (!n)
+                       goto error;
+               bsz = bs;
+               buf = n;
+               /* read extra we do not know about */
+               ret = fread(buf, bs - bsz, 1, jd->in);
+               if (ret != 1)
+                       goto error;
+       }
+       /*
+        * keep dirname for generating files and mmap records
+        */
+       strcpy(jd->dir, name);
+       dirname(jd->dir);
+
+       return 0;
+error:
+       funlockfile(jd->in);
+       fclose(jd->in);
+       return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+       struct jr_prefix *prefix;
+       union jr_entry *jr;
+       void *addr;
+       size_t bs, size;
+       int id, ret;
+
+       if (!(jd && jd->in))
+               return NULL;
+
+       if (jd->buf == NULL) {
+               size_t sz = getpagesize();
+               if (sz < sizeof(*prefix))
+                       sz = sizeof(*prefix);
+
+               jd->buf = malloc(sz);
+               if (jd->buf == NULL)
+                       return NULL;
+
+               jd->bufsize = sz;
+       }
+
+       prefix = jd->buf;
+
+       /*
+        * file is still locked at this point
+        */
+       ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+       if (ret  != 1)
+               return NULL;
+
+       if (jd->needs_bswap) {
+               prefix->id         = bswap_32(prefix->id);
+               prefix->total_size = bswap_32(prefix->total_size);
+               prefix->timestamp  = bswap_64(prefix->timestamp);
+       }
+       id   = prefix->id;
+       size = prefix->total_size;
+
+       bs = (size_t)size;
+       if (bs < sizeof(*prefix))
+               return NULL;
+
+       if (id >= JIT_CODE_MAX) {
+               pr_warning("next_entry: unknown prefix %d, skipping\n", id);
+               return NULL;
+       }
+       if (bs > jd->bufsize) {
+               void *n;
+               n = realloc(jd->buf, bs);
+               if (!n)
+                       return NULL;
+               jd->buf = n;
+               jd->bufsize = bs;
+       }
+
+       addr = ((void *)jd->buf) + sizeof(*prefix);
+
+       ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+       if (ret != 1)
+               return NULL;
+
+       jr = (union jr_entry *)jd->buf;
+
+       switch(id) {
+       case JIT_CODE_DEBUG_INFO:
+               if (jd->needs_bswap) {
+                       uint64_t n;
+                       jr->info.code_addr = bswap_64(jr->info.code_addr);
+                       jr->info.nr_entry  = bswap_64(jr->info.nr_entry);
+                       for (n = 0 ; n < jr->info.nr_entry; n++) {
+                               jr->info.entries[n].addr    = bswap_64(jr->info.entries[n].addr);
+                               jr->info.entries[n].lineno  = bswap_32(jr->info.entries[n].lineno);
+                               jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+                       }
+               }
+               break;
+       case JIT_CODE_CLOSE:
+               break;
+       case JIT_CODE_LOAD:
+               if (jd->needs_bswap) {
+                       jr->load.pid       = bswap_32(jr->load.pid);
+                       jr->load.tid       = bswap_32(jr->load.tid);
+                       jr->load.vma       = bswap_64(jr->load.vma);
+                       jr->load.code_addr = bswap_64(jr->load.code_addr);
+                       jr->load.code_size = bswap_64(jr->load.code_size);
+                       jr->load.code_index= bswap_64(jr->load.code_index);
+               }
+               jd->code_load_count++;
+               break;
+       case JIT_CODE_MOVE:
+               if (jd->needs_bswap) {
+                       jr->move.pid           = bswap_32(jr->move.pid);
+                       jr->move.tid           = bswap_32(jr->move.tid);
+                       jr->move.vma           = bswap_64(jr->move.vma);
+                       jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+                       jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+                       jr->move.code_size     = bswap_64(jr->move.code_size);
+                       jr->move.code_index    = bswap_64(jr->move.code_index);
+               }
+               break;
+       case JIT_CODE_MAX:
+       default:
+               return NULL;
+       }
+       return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+       ssize_t size;
+
+       size = perf_data_file__write(jd->output, event, event->header.size);
+       if (size < 0)
+               return -1;
+
+       jd->bytes_written += size;
+       return 0;
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+       struct perf_sample sample;
+       union perf_event *event;
+       struct perf_tool *tool = jd->session->tool;
+       uint64_t code, addr;
+       uintptr_t uaddr;
+       char *filename;
+       struct stat st;
+       size_t size;
+       u16 idr_size;
+       const char *sym;
+       uint32_t count;
+       int ret, csize;
+       pid_t pid, tid;
+       struct {
+               u32 pid, tid;
+               u64 time;
+       } *id;
+
+       pid   = jr->load.pid;
+       tid   = jr->load.tid;
+       csize = jr->load.code_size;
+       addr  = jr->load.code_addr;
+       sym   = (void *)((unsigned long)jr + sizeof(jr->load));
+       code  = (unsigned long)jr + jr->load.p.total_size - csize;
+       count = jr->load.code_index;
+       idr_size = jd->machine->id_hdr_size;
+
+       event = calloc(1, sizeof(*event) + idr_size);
+       if (!event)
+               return -1;
+
+       filename = event->mmap2.filename;
+       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so",
+                       jd->dir,
+                       pid,
+                       count);
+
+       size++; /* for \0 */
+
+       size = PERF_ALIGN(size, sizeof(u64));
+       uaddr = (uintptr_t)code;
+       ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries);
+
+       if (jd->debug_data && jd->nr_debug_entries) {
+               free(jd->debug_data);
+               jd->debug_data = NULL;
+               jd->nr_debug_entries = 0;
+       }
+
+       if (ret) {
+               free(event);
+               return -1;
+       }
+       if (stat(filename, &st))
+               memset(&st, 0, sizeof(stat));
+
+       event->mmap2.header.type = PERF_RECORD_MMAP2;
+       event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+       event->mmap2.header.size = (sizeof(event->mmap2) -
+                       (sizeof(event->mmap2.filename) - size) + idr_size);
+
+       event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+       event->mmap2.start = addr;
+       event->mmap2.len   = csize;
+       event->mmap2.pid   = pid;
+       event->mmap2.tid   = tid;
+       event->mmap2.ino   = st.st_ino;
+       event->mmap2.maj   = major(st.st_dev);
+       event->mmap2.min   = minor(st.st_dev);
+       event->mmap2.prot  = st.st_mode;
+       event->mmap2.flags = MAP_SHARED;
+       event->mmap2.ino_generation = 1;
+
+       id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+       if (jd->sample_type & PERF_SAMPLE_TID) {
+               id->pid  = pid;
+               id->tid  = tid;
+       }
+       if (jd->sample_type & PERF_SAMPLE_TIME)
+               id->time = jr->load.p.timestamp;
+
+       /*
+        * create pseudo sample to induce dso hit increment
+        * use first address as sample address
+        */
+       memset(&sample, 0, sizeof(sample));
+       sample.pid  = pid;
+       sample.tid  = tid;
+       sample.time = id->time;
+       sample.ip   = addr;
+
+       ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+       if (ret)
+               return ret;
+
+       ret = jit_inject_event(jd, event);
+       /*
+        * mark dso as use to generate buildid in the header
+        */
+       if (!ret)
+               build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+       return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+       struct perf_sample sample;
+       union perf_event *event;
+       struct perf_tool *tool = jd->session->tool;
+       char *filename;
+       size_t size;
+       struct stat st;
+       u16 idr_size;
+       int ret;
+       pid_t pid, tid;
+       struct {
+               u32 pid, tid;
+               u64 time;
+       } *id;
+
+       pid = jr->move.pid;
+       tid =  jr->move.tid;
+       idr_size = jd->machine->id_hdr_size;
+
+       /*
+        * +16 to account for sample_id_all (hack)
+        */
+       event = calloc(1, sizeof(*event) + 16);
+       if (!event)
+               return -1;
+
+       filename = event->mmap2.filename;
+       size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64,
+                jd->dir,
+                pid,
+                jr->move.code_index);
+
+       size++; /* for \0 */
+
+       if (stat(filename, &st))
+               memset(&st, 0, sizeof(stat));
+
+       size = PERF_ALIGN(size, sizeof(u64));
+
+       event->mmap2.header.type = PERF_RECORD_MMAP2;
+       event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+       event->mmap2.header.size = (sizeof(event->mmap2) -
+                       (sizeof(event->mmap2.filename) - size) + idr_size);
+       event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+       event->mmap2.start = jr->move.new_code_addr;
+       event->mmap2.len   = jr->move.code_size;
+       event->mmap2.pid   = pid;
+       event->mmap2.tid   = tid;
+       event->mmap2.ino   = st.st_ino;
+       event->mmap2.maj   = major(st.st_dev);
+       event->mmap2.min   = minor(st.st_dev);
+       event->mmap2.prot  = st.st_mode;
+       event->mmap2.flags = MAP_SHARED;
+       event->mmap2.ino_generation = 1;
+
+       id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+       if (jd->sample_type & PERF_SAMPLE_TID) {
+               id->pid  = pid;
+               id->tid  = tid;
+       }
+       if (jd->sample_type & PERF_SAMPLE_TIME)
+               id->time = jr->load.p.timestamp;
+
+       /*
+        * create pseudo sample to induce dso hit increment
+        * use first address as sample address
+        */
+       memset(&sample, 0, sizeof(sample));
+       sample.pid  = pid;
+       sample.tid  = tid;
+       sample.time = id->time;
+       sample.ip   = jr->move.new_code_addr;
+
+       ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+       if (ret)
+               return ret;
+
+       ret = jit_inject_event(jd, event);
+       if (!ret)
+               build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+       return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+       void *data;
+       size_t sz;
+
+       if (!(jd && jr))
+               return -1;
+
+       sz  = jr->prefix.total_size - sizeof(jr->info);
+       data = malloc(sz);
+       if (!data)
+               return -1;
+
+       memcpy(data, &jr->info.entries, sz);
+
+       jd->debug_data       = data;
+
+       /*
+        * we must use nr_entry instead of size here because
+        * we cannot distinguish actual entry from padding otherwise
+        */
+       jd->nr_debug_entries = jr->info.nr_entry;
+
+       return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+       union jr_entry *jr;
+       int ret;
+
+       while ((jr = jit_get_next_entry(jd))) {
+               switch(jr->prefix.id) {
+               case JIT_CODE_LOAD:
+                       ret = jit_repipe_code_load(jd, jr);
+                       break;
+               case JIT_CODE_MOVE:
+                       ret = jit_repipe_code_move(jd, jr);
+                       break;
+               case JIT_CODE_DEBUG_INFO:
+                       ret = jit_repipe_debug_info(jd, jr);
+                       break;
+               default:
+                       ret = 0;
+                       continue;
+               }
+       }
+       return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+       int ret;
+
+       if (verbose > 0)
+               fprintf(stderr, "injecting: %s\n", path);
+
+       ret = jit_open(jd, path);
+       if (ret)
+               return -1;
+
+       ret = jit_process_dump(jd);
+
+       jit_close(jd);
+
+       if (verbose > 0)
+               fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+       return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid)
+ {
+       char *p;
+       char *end = NULL;
+       pid_t pid2;
+
+       if (verbose > 2)
+               fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+       /*
+        * get file name
+        */
+       p = strrchr(mmap_name, '/');
+       if (!p)
+               return -1;
+
+       /*
+        * match prefix
+        */
+       if (strncmp(p, "/jit-", 5))
+               return -1;
+
+       /*
+        * skip prefix
+        */
+       p += 5;
+
+       /*
+        * must be followed by a pid
+        */
+       if (!isdigit(*p))
+               return -1;
+
+       pid2 = (int)strtol(p, &end, 10);
+       if (!end)
+               return -1;
+
+       /*
+        * pid does not match mmap pid
+        * pid==0 in system-wide mode (synthesized)
+        */
+       if (pid && pid2 != pid)
+               return -1;
+       /*
+        * validate suffix
+        */
+       if (strcmp(end, ".dump"))
+               return -1;
+
+       if (verbose > 0)
+               fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+       return 0;
+}
+
+int
+jit_process(struct perf_session *session,
+           struct perf_data_file *output,
+           struct machine *machine,
+           char *filename,
+           pid_t pid,
+           u64 *nbytes)
+{
+       struct perf_evsel *first;
+       struct jit_buf_desc jd;
+       int ret;
+
+       /*
+        * first, detect marker mmap (i.e., the jitdump mmap)
+        */
+       if (jit_detect(filename, pid))
+               return 0;
+
+       memset(&jd, 0, sizeof(jd));
+
+       jd.session = session;
+       jd.output  = output;
+       jd.machine = machine;
+
+       /*
+        * track sample_type to compute id_all layout
+        * perf sets the same sample type to all events as of now
+        */
+       first = perf_evlist__first(session->evlist);
+       jd.sample_type = first->attr.sample_type;
+
+       *nbytes = 0;
+
+       ret = jit_inject(&jd, filename);
+       if (!ret) {
+               *nbytes = jd.bytes_written;
+               ret = 1;
+       }
+
+       return ret;
+}
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
new file mode 100644 (file)
index 0000000..b66c1f5
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC                0x4A695444
+#define JITHEADER_MAGIC_SW     0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+       JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+                               (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+       uint32_t magic;         /* characters "jItD" */
+       uint32_t version;       /* header version */
+       uint32_t total_size;    /* total size of header */
+       uint32_t elf_mach;      /* elf mach target */
+       uint32_t pad1;          /* reserved */
+       uint32_t pid;           /* JIT process id */
+       uint64_t timestamp;     /* timestamp */
+       uint64_t flags;         /* flags */
+};
+
+enum jit_record_type {
+       JIT_CODE_LOAD           = 0,
+        JIT_CODE_MOVE           = 1,
+       JIT_CODE_DEBUG_INFO     = 2,
+       JIT_CODE_CLOSE          = 3,
+
+       JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+       uint32_t id;
+       uint32_t total_size;
+       uint64_t timestamp;
+};
+
+struct jr_code_load {
+       struct jr_prefix p;
+
+       uint32_t pid;
+       uint32_t tid;
+       uint64_t vma;
+       uint64_t code_addr;
+       uint64_t code_size;
+       uint64_t code_index;
+};
+
+struct jr_code_close {
+       struct jr_prefix p;
+};
+
+struct jr_code_move {
+       struct jr_prefix p;
+
+       uint32_t pid;
+       uint32_t tid;
+       uint64_t vma;
+       uint64_t old_code_addr;
+       uint64_t new_code_addr;
+       uint64_t code_size;
+       uint64_t code_index;
+};
+
+struct debug_entry {
+       uint64_t addr;
+       int lineno;         /* source line number starting at 1 */
+       int discrim;        /* column discriminator, 0 is default */
+       const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+       struct jr_prefix p;
+
+       uint64_t code_addr;
+       uint64_t nr_entry;
+       struct debug_entry entries[0];
+};
+
+union jr_entry {
+        struct jr_code_debug_info info;
+        struct jr_code_close close;
+        struct jr_code_load load;
+        struct jr_code_move move;
+        struct jr_prefix prefix;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+       void *a = ent + 1;
+       size_t l = strlen(ent->name) + 1;
+       return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+       void *a = ent + 1;
+       return a;
+}
+
+#endif /* !JITDUMP_H */
index ae825d4ec110fcfe6a06ef5e4daf50b03a214a87..d01e73592f6e34347f0c26531b58aa6ce1d57d52 100644 (file)
@@ -122,6 +122,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm,
 
 bool kvm_exit_event(struct perf_evsel *evsel);
 bool kvm_entry_event(struct perf_evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
 
 #define define_exit_reasons_table(name, symbols)       \
        static struct exit_reasons_table name[] = {     \
@@ -133,8 +134,13 @@ bool kvm_entry_event(struct perf_evsel *evsel);
  */
 int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
 
-extern const char * const kvm_events_tp[];
+extern const char *kvm_events_tp[];
 extern struct kvm_reg_events_ops kvm_reg_events_ops[];
 extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const int decode_str_len;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
 
 #endif /* __PERF_KVM_STAT_H */
index 2c2b443df5ba796c43ee12c6c0f2061b5d7c7b80..1a3e45baf97fb575c02afe49707727aff0f628ec 100644 (file)
@@ -179,6 +179,16 @@ struct symbol *machine__find_kernel_symbol(struct machine *machine,
                                       mapp, filter);
 }
 
+static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+                                                  enum map_type type, const char *name,
+                                                  struct map **mapp,
+                                                  symbol_filter_t filter)
+{
+       return map_groups__find_symbol_by_name(&machine->kmaps, type, name,
+                                              mapp, filter);
+}
+
 static inline
 struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
                                             struct map **mapp,
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
new file mode 100644 (file)
index 0000000..75465f8
--- /dev/null
@@ -0,0 +1,255 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+       E("ldlat-loads",        "cpu/mem-loads,ldlat=30/P",     "mem-loads"),
+       E("ldlat-stores",       "cpu/mem-stores/P",             "mem-stores"),
+};
+#undef E
+
+#undef E
+
+char *perf_mem_events__name(int i)
+{
+       return (char *)perf_mem_events[i].name;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+       char *tok, *saveptr = NULL;
+       bool found = false;
+       char *buf;
+       int j;
+
+       /* We need buffer that we know we can write to. */
+       buf = malloc(strlen(str) + 1);
+       if (!buf)
+               return -ENOMEM;
+
+       strcpy(buf, str);
+
+       tok = strtok_r((char *)buf, ",", &saveptr);
+
+       while (tok) {
+               for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+                       struct perf_mem_event *e = &perf_mem_events[j];
+
+                       if (strstr(e->tag, tok))
+                               e->record = found = true;
+               }
+
+               tok = strtok_r(NULL, ",", &saveptr);
+       }
+
+       free(buf);
+
+       if (found)
+               return 0;
+
+       pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+       return -1;
+}
+
+int perf_mem_events__init(void)
+{
+       const char *mnt = sysfs__mount();
+       bool found = false;
+       int j;
+
+       if (!mnt)
+               return -ENOENT;
+
+       for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+               char path[PATH_MAX];
+               struct perf_mem_event *e = &perf_mem_events[j];
+               struct stat st;
+
+               scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+                         mnt, e->sysfs_name);
+
+               if (!stat(path, &st))
+                       e->supported = found = true;
+       }
+
+       return found ? 0 : -ENOENT;
+}
+
+static const char * const tlb_access[] = {
+       "N/A",
+       "HIT",
+       "MISS",
+       "L1",
+       "L2",
+       "Walker",
+       "Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       size_t l = 0, i;
+       u64 m = PERF_MEM_TLB_NA;
+       u64 hit, miss;
+
+       sz -= 1; /* -1 for null termination */
+       out[0] = '\0';
+
+       if (mem_info)
+               m = mem_info->data_src.mem_dtlb;
+
+       hit = m & PERF_MEM_TLB_HIT;
+       miss = m & PERF_MEM_TLB_MISS;
+
+       /* already taken care of */
+       m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+       for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+               if (!(m & 0x1))
+                       continue;
+               if (l) {
+                       strcat(out, " or ");
+                       l += 4;
+               }
+               l += scnprintf(out + l, sz - l, tlb_access[i]);
+       }
+       if (*out == '\0')
+               l += scnprintf(out, sz - l, "N/A");
+       if (hit)
+               l += scnprintf(out + l, sz - l, " hit");
+       if (miss)
+               l += scnprintf(out + l, sz - l, " miss");
+
+       return l;
+}
+
+static const char * const mem_lvl[] = {
+       "N/A",
+       "HIT",
+       "MISS",
+       "L1",
+       "LFB",
+       "L2",
+       "L3",
+       "Local RAM",
+       "Remote RAM (1 hop)",
+       "Remote RAM (2 hops)",
+       "Remote Cache (1 hop)",
+       "Remote Cache (2 hops)",
+       "I/O",
+       "Uncached",
+};
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       size_t i, l = 0;
+       u64 m =  PERF_MEM_LVL_NA;
+       u64 hit, miss;
+
+       if (mem_info)
+               m  = mem_info->data_src.mem_lvl;
+
+       sz -= 1; /* -1 for null termination */
+       out[0] = '\0';
+
+       hit = m & PERF_MEM_LVL_HIT;
+       miss = m & PERF_MEM_LVL_MISS;
+
+       /* already taken care of */
+       m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+       for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+               if (!(m & 0x1))
+                       continue;
+               if (l) {
+                       strcat(out, " or ");
+                       l += 4;
+               }
+               l += scnprintf(out + l, sz - l, mem_lvl[i]);
+       }
+       if (*out == '\0')
+               l += scnprintf(out, sz - l, "N/A");
+       if (hit)
+               l += scnprintf(out + l, sz - l, " hit");
+       if (miss)
+               l += scnprintf(out + l, sz - l, " miss");
+
+       return l;
+}
+
+static const char * const snoop_access[] = {
+       "N/A",
+       "None",
+       "Miss",
+       "Hit",
+       "HitM",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       size_t i, l = 0;
+       u64 m = PERF_MEM_SNOOP_NA;
+
+       sz -= 1; /* -1 for null termination */
+       out[0] = '\0';
+
+       if (mem_info)
+               m = mem_info->data_src.mem_snoop;
+
+       for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+               if (!(m & 0x1))
+                       continue;
+               if (l) {
+                       strcat(out, " or ");
+                       l += 4;
+               }
+               l += scnprintf(out + l, sz - l, snoop_access[i]);
+       }
+
+       if (*out == '\0')
+               l += scnprintf(out, sz - l, "N/A");
+
+       return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       u64 mask = PERF_MEM_LOCK_NA;
+       int l;
+
+       if (mem_info)
+               mask = mem_info->data_src.mem_lock;
+
+       if (mask & PERF_MEM_LOCK_NA)
+               l = scnprintf(out, sz, "N/A");
+       else if (mask & PERF_MEM_LOCK_LOCKED)
+               l = scnprintf(out, sz, "Yes");
+       else
+               l = scnprintf(out, sz, "No");
+
+       return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+       int i = 0;
+
+       i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+       i += scnprintf(out + i, sz - i, "|SNP ");
+       i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+       i += scnprintf(out + i, sz - i, "|TLB ");
+       i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+       i += scnprintf(out + i, sz - i, "|LCK ");
+       i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+
+       return i;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
new file mode 100644 (file)
index 0000000..5d6d930
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+
+struct perf_mem_event {
+       bool            record;
+       bool            supported;
+       const char      *tag;
+       const char      *name;
+       const char      *sysfs_name;
+};
+
+enum {
+       PERF_MEM_EVENTS__LOAD,
+       PERF_MEM_EVENTS__STORE,
+       PERF_MEM_EVENTS__MAX,
+};
+
+extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i);
+
+struct mem_info;
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+#endif /* __PERF_MEM_EVENTS_H */
index 813d9b272c813b0dd749470f6209aa3a0a0607b4..4c19d5e79d8c4d626eb3fa91486cc1d83447aeeb 100644 (file)
@@ -279,7 +279,24 @@ const char *event_type(int type)
        return "unknown";
 }
 
+static int parse_events__is_name_term(struct parse_events_term *term)
+{
+       return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
+}
 
+static char *get_config_name(struct list_head *head_terms)
+{
+       struct parse_events_term *term;
+
+       if (!head_terms)
+               return NULL;
+
+       list_for_each_entry(term, head_terms, list)
+               if (parse_events__is_name_term(term))
+                       return term->val.str;
+
+       return NULL;
+}
 
 static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
@@ -333,11 +350,25 @@ static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES]
        return -1;
 }
 
+typedef int config_term_func_t(struct perf_event_attr *attr,
+                              struct parse_events_term *term,
+                              struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+                             struct parse_events_term *term,
+                             struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+                      struct list_head *head,
+                      struct parse_events_error *err,
+                      config_term_func_t config_term);
+
 int parse_events_add_cache(struct list_head *list, int *idx,
-                          char *type, char *op_result1, char *op_result2)
+                          char *type, char *op_result1, char *op_result2,
+                          struct parse_events_error *err,
+                          struct list_head *head_config)
 {
        struct perf_event_attr attr;
-       char name[MAX_NAME_LEN];
+       LIST_HEAD(config_terms);
+       char name[MAX_NAME_LEN], *config_name;
        int cache_type = -1, cache_op = -1, cache_result = -1;
        char *op_result[2] = { op_result1, op_result2 };
        int i, n;
@@ -351,6 +382,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
        if (cache_type == -1)
                return -EINVAL;
 
+       config_name = get_config_name(head_config);
        n = snprintf(name, MAX_NAME_LEN, "%s", type);
 
        for (i = 0; (i < 2) && (op_result[i]); i++) {
@@ -391,7 +423,16 @@ int parse_events_add_cache(struct list_head *list, int *idx,
        memset(&attr, 0, sizeof(attr));
        attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
        attr.type = PERF_TYPE_HW_CACHE;
-       return add_event(list, idx, &attr, name, NULL);
+
+       if (head_config) {
+               if (config_attr(&attr, head_config, err,
+                               config_term_common))
+                       return -EINVAL;
+
+               if (get_config_terms(head_config, &config_terms))
+                       return -ENOMEM;
+       }
+       return add_event(list, idx, &attr, config_name ? : name, &config_terms);
 }
 
 static void tracepoint_error(struct parse_events_error *e, int err,
@@ -540,6 +581,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 struct __add_bpf_event_param {
        struct parse_events_evlist *data;
        struct list_head *list;
+       struct list_head *head_config;
 };
 
 static int add_bpf_event(struct probe_trace_event *tev, int fd,
@@ -556,7 +598,8 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd,
                 tev->group, tev->event, fd);
 
        err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group,
-                                         tev->event, evlist->error, NULL);
+                                         tev->event, evlist->error,
+                                         param->head_config);
        if (err) {
                struct perf_evsel *evsel, *tmp;
 
@@ -581,11 +624,12 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd,
 
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
                              struct list_head *list,
-                             struct bpf_object *obj)
+                             struct bpf_object *obj,
+                             struct list_head *head_config)
 {
        int err;
        char errbuf[BUFSIZ];
-       struct __add_bpf_event_param param = {data, list};
+       struct __add_bpf_event_param param = {data, list, head_config};
        static bool registered_unprobe_atexit = false;
 
        if (IS_ERR(obj) || !obj) {
@@ -631,17 +675,99 @@ errout:
        return err;
 }
 
+static int
+parse_events_config_bpf(struct parse_events_evlist *data,
+                       struct bpf_object *obj,
+                       struct list_head *head_config)
+{
+       struct parse_events_term *term;
+       int error_pos;
+
+       if (!head_config || list_empty(head_config))
+               return 0;
+
+       list_for_each_entry(term, head_config, list) {
+               char errbuf[BUFSIZ];
+               int err;
+
+               if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+                       snprintf(errbuf, sizeof(errbuf),
+                                "Invalid config term for BPF object");
+                       errbuf[BUFSIZ - 1] = '\0';
+
+                       data->error->idx = term->err_term;
+                       data->error->str = strdup(errbuf);
+                       return -EINVAL;
+               }
+
+               err = bpf__config_obj(obj, term, data->evlist, &error_pos);
+               if (err) {
+                       bpf__strerror_config_obj(obj, term, data->evlist,
+                                                &error_pos, err, errbuf,
+                                                sizeof(errbuf));
+                       data->error->help = strdup(
+"Hint:\tValid config terms:\n"
+"     \tmap:[<arraymap>].value<indices>=[value]\n"
+"     \tmap:[<eventmap>].event<indices>=[event]\n"
+"\n"
+"     \twhere <indices> is something like [0,3...5] or [all]\n"
+"     \t(add -v to see detail)");
+                       data->error->str = strdup(errbuf);
+                       if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+                               data->error->idx = term->err_val;
+                       else
+                               data->error->idx = term->err_term + error_pos;
+                       return err;
+               }
+       }
+       return 0;
+}
+
+/*
+ * Split config terms:
+ * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
+ *  'call-graph=fp' is 'evt config', should be applied to each
+ *  events in bpf.c.
+ * 'map:array.value[0]=1' is 'obj config', should be processed
+ * with parse_events_config_bpf.
+ *
+ * Move object config terms from the first list to obj_head_config.
+ */
+static void
+split_bpf_config_terms(struct list_head *evt_head_config,
+                      struct list_head *obj_head_config)
+{
+       struct parse_events_term *term, *temp;
+
+       /*
+        * Currectly, all possible user config term
+        * belong to bpf object. parse_events__is_hardcoded_term()
+        * happends to be a good flag.
+        *
+        * See parse_events_config_bpf() and
+        * config_term_tracepoint().
+        */
+       list_for_each_entry_safe(term, temp, evt_head_config, list)
+               if (!parse_events__is_hardcoded_term(term))
+                       list_move_tail(&term->list, obj_head_config);
+}
+
 int parse_events_load_bpf(struct parse_events_evlist *data,
                          struct list_head *list,
                          char *bpf_file_name,
-                         bool source)
+                         bool source,
+                         struct list_head *head_config)
 {
+       int err;
        struct bpf_object *obj;
+       LIST_HEAD(obj_head_config);
+
+       if (head_config)
+               split_bpf_config_terms(head_config, &obj_head_config);
 
        obj = bpf__prepare_load(bpf_file_name, source);
        if (IS_ERR(obj)) {
                char errbuf[BUFSIZ];
-               int err;
 
                err = PTR_ERR(obj);
 
@@ -659,7 +785,18 @@ int parse_events_load_bpf(struct parse_events_evlist *data,
                return err;
        }
 
-       return parse_events_load_bpf_obj(data, list, obj);
+       err = parse_events_load_bpf_obj(data, list, obj, head_config);
+       if (err)
+               return err;
+       err = parse_events_config_bpf(data, obj, &obj_head_config);
+
+       /*
+        * Caller doesn't know anything about obj_head_config,
+        * so combine them together again before returnning.
+        */
+       if (head_config)
+               list_splice_tail(&obj_head_config, head_config);
+       return err;
 }
 
 static int
@@ -746,9 +883,59 @@ static int check_type_val(struct parse_events_term *term,
        return -EINVAL;
 }
 
-typedef int config_term_func_t(struct perf_event_attr *attr,
-                              struct parse_events_term *term,
-                              struct parse_events_error *err);
+/*
+ * Update according to parse-events.l
+ */
+static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+       [PARSE_EVENTS__TERM_TYPE_USER]                  = "<sysfs term>",
+       [PARSE_EVENTS__TERM_TYPE_CONFIG]                = "config",
+       [PARSE_EVENTS__TERM_TYPE_CONFIG1]               = "config1",
+       [PARSE_EVENTS__TERM_TYPE_CONFIG2]               = "config2",
+       [PARSE_EVENTS__TERM_TYPE_NAME]                  = "name",
+       [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]         = "period",
+       [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]           = "freq",
+       [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]    = "branch_type",
+       [PARSE_EVENTS__TERM_TYPE_TIME]                  = "time",
+       [PARSE_EVENTS__TERM_TYPE_CALLGRAPH]             = "call-graph",
+       [PARSE_EVENTS__TERM_TYPE_STACKSIZE]             = "stack-size",
+       [PARSE_EVENTS__TERM_TYPE_NOINHERIT]             = "no-inherit",
+       [PARSE_EVENTS__TERM_TYPE_INHERIT]               = "inherit",
+};
+
+static bool config_term_shrinked;
+
+static bool
+config_term_avail(int term_type, struct parse_events_error *err)
+{
+       if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+               err->str = strdup("Invalid term_type");
+               return false;
+       }
+       if (!config_term_shrinked)
+               return true;
+
+       switch (term_type) {
+       case PARSE_EVENTS__TERM_TYPE_CONFIG:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+       case PARSE_EVENTS__TERM_TYPE_NAME:
+               return true;
+       default:
+               if (!err)
+                       return false;
+
+               /* term_type is validated so indexing is safe */
+               if (asprintf(&err->str, "'%s' is not usable in 'perf stat'",
+                            config_term_names[term_type]) < 0)
+                       err->str = NULL;
+               return false;
+       }
+}
+
+void parse_events__shrink_config_terms(void)
+{
+       config_term_shrinked = true;
+}
 
 static int config_term_common(struct perf_event_attr *attr,
                              struct parse_events_term *term,
@@ -815,6 +1002,17 @@ do {                                                                         \
                return -EINVAL;
        }
 
+       /*
+        * Check term availbility after basic checking so
+        * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+        *
+        * If check availbility at the entry of this function,
+        * user will see "'<sysfs term>' is not usable in 'perf stat'"
+        * if an invalid config term is provided for legacy events
+        * (for example, instructions/badterm/...), which is confusing.
+        */
+       if (!config_term_avail(term->type_term, err))
+               return -EINVAL;
        return 0;
 #undef CHECK_TYPE_VAL
 }
@@ -961,23 +1159,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data,
                        return -ENOMEM;
        }
 
-       return add_event(list, &data->idx, &attr, NULL, &config_terms);
-}
-
-static int parse_events__is_name_term(struct parse_events_term *term)
-{
-       return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
-}
-
-static char *pmu_event_name(struct list_head *head_terms)
-{
-       struct parse_events_term *term;
-
-       list_for_each_entry(term, head_terms, list)
-               if (parse_events__is_name_term(term))
-                       return term->val.str;
-
-       return NULL;
+       return add_event(list, &data->idx, &attr,
+                        get_config_name(head_config), &config_terms);
 }
 
 int parse_events_add_pmu(struct parse_events_evlist *data,
@@ -1024,7 +1207,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
                return -EINVAL;
 
        evsel = __add_event(list, &data->idx, &attr,
-                           pmu_event_name(head_config), pmu->cpus,
+                           get_config_name(head_config), pmu->cpus,
                            &config_terms);
        if (evsel) {
                evsel->unit = info.unit;
@@ -1386,8 +1569,7 @@ int parse_events_terms(struct list_head *terms, const char *str)
                return 0;
        }
 
-       if (data.terms)
-               parse_events__free_terms(data.terms);
+       parse_events_terms__delete(data.terms);
        return ret;
 }
 
@@ -1395,9 +1577,10 @@ int parse_events(struct perf_evlist *evlist, const char *str,
                 struct parse_events_error *err)
 {
        struct parse_events_evlist data = {
-               .list  = LIST_HEAD_INIT(data.list),
-               .idx   = evlist->nr_entries,
-               .error = err,
+               .list   = LIST_HEAD_INIT(data.list),
+               .idx    = evlist->nr_entries,
+               .error  = err,
+               .evlist = evlist,
        };
        int ret;
 
@@ -2068,12 +2251,29 @@ int parse_events_term__clone(struct parse_events_term **new,
                        term->err_term, term->err_val);
 }
 
-void parse_events__free_terms(struct list_head *terms)
+void parse_events_terms__purge(struct list_head *terms)
 {
        struct parse_events_term *term, *h;
 
-       list_for_each_entry_safe(term, h, terms, list)
+       list_for_each_entry_safe(term, h, terms, list) {
+               if (term->array.nr_ranges)
+                       free(term->array.ranges);
+               list_del_init(&term->list);
                free(term);
+       }
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+       if (!terms)
+               return;
+       parse_events_terms__purge(terms);
+       free(terms);
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+       free(a->ranges);
 }
 
 void parse_events_evlist_error(struct parse_events_evlist *data,
@@ -2088,6 +2288,33 @@ void parse_events_evlist_error(struct parse_events_evlist *data,
        WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
 }
 
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+       int i;
+       bool first = true;
+
+       buf[0] = '\0';
+       for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+               const char *name = config_term_names[i];
+
+               if (!config_term_avail(i, NULL))
+                       continue;
+               if (!name)
+                       continue;
+               if (name[0] == '<')
+                       continue;
+
+               if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+                       return;
+
+               if (!first)
+                       strcat(buf, ",");
+               else
+                       first = false;
+               strcat(buf, name);
+       }
+}
+
 /*
  * Return string contains valid config terms of an event.
  * @additional_terms: For terms such as PMU sysfs terms.
@@ -2095,17 +2322,18 @@ void parse_events_evlist_error(struct parse_events_evlist *data,
 char *parse_events_formats_error_string(char *additional_terms)
 {
        char *str;
-       static const char *static_terms = "config,config1,config2,name,"
-                                         "period,freq,branch_type,time,"
-                                         "call-graph,stack-size\n";
+       /* "branch_type" is the longest name */
+       char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+                         (sizeof("branch_type") - 1)];
 
+       config_terms_list(static_terms, sizeof(static_terms));
        /* valid terms */
        if (additional_terms) {
-               if (!asprintf(&str, "valid terms: %s,%s",
-                             additional_terms, static_terms))
+               if (asprintf(&str, "valid terms: %s,%s",
+                            additional_terms, static_terms) < 0)
                        goto fail;
        } else {
-               if (!asprintf(&str, "valid terms: %s", static_terms))
+               if (asprintf(&str, "valid terms: %s", static_terms) < 0)
                        goto fail;
        }
        return str;
index f1a6db107241b1c8ffaf03a3514ba1549df6fd1b..67e493088e81c29c17819e94b37da4f83fae073f 100644 (file)
@@ -68,11 +68,21 @@ enum {
        PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
        PARSE_EVENTS__TERM_TYPE_STACKSIZE,
        PARSE_EVENTS__TERM_TYPE_NOINHERIT,
-       PARSE_EVENTS__TERM_TYPE_INHERIT
+       PARSE_EVENTS__TERM_TYPE_INHERIT,
+       __PARSE_EVENTS__TERM_TYPE_NR,
+};
+
+struct parse_events_array {
+       size_t nr_ranges;
+       struct {
+               unsigned int start;
+               size_t length;
+       } *ranges;
 };
 
 struct parse_events_term {
        char *config;
+       struct parse_events_array array;
        union {
                char *str;
                u64  num;
@@ -98,12 +108,14 @@ struct parse_events_evlist {
        int                        idx;
        int                        nr_groups;
        struct parse_events_error *error;
+       struct perf_evlist        *evlist;
 };
 
 struct parse_events_terms {
        struct list_head *terms;
 };
 
+void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
                           int type_term, char *config, u64 num,
@@ -115,7 +127,9 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
                              char *config, unsigned idx);
 int parse_events_term__clone(struct parse_events_term **new,
                             struct parse_events_term *term);
-void parse_events__free_terms(struct list_head *terms);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+void parse_events__clear_array(struct parse_events_array *a);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
@@ -126,18 +140,22 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
 int parse_events_load_bpf(struct parse_events_evlist *data,
                          struct list_head *list,
                          char *bpf_file_name,
-                         bool source);
+                         bool source,
+                         struct list_head *head_config);
 /* Provide this function for perf test */
 struct bpf_object;
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
                              struct list_head *list,
-                             struct bpf_object *obj);
+                             struct bpf_object *obj,
+                             struct list_head *head_config);
 int parse_events_add_numeric(struct parse_events_evlist *data,
                             struct list_head *list,
                             u32 type, u64 config,
                             struct list_head *head_config);
 int parse_events_add_cache(struct list_head *list, int *idx,
-                          char *type, char *op_result1, char *op_result2);
+                          char *type, char *op_result1, char *op_result2,
+                          struct parse_events_error *error,
+                          struct list_head *head_config);
 int parse_events_add_breakpoint(struct list_head *list, int *idx,
                                void *ptr, char *type, u64 len);
 int parse_events_add_pmu(struct parse_events_evlist *data,
index 58c5831ffd5c22133f48a4c1a3a07721c71362fa..1477fbc78993c7b31d7bb1531f630cca8587d93f 100644 (file)
@@ -9,8 +9,8 @@
 %{
 #include <errno.h>
 #include "../perf.h"
-#include "parse-events-bison.h"
 #include "parse-events.h"
+#include "parse-events-bison.h"
 
 char *parse_events_get_text(yyscan_t yyscanner);
 YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
@@ -111,6 +111,7 @@ do {                                                        \
 %x mem
 %s config
 %x event
+%x array
 
 group          [^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu      [^,{}/]+[/][^/]*[/][^,{}/]*
@@ -122,7 +123,7 @@ num_dec             [0-9]+
 num_hex                0x[a-fA-F0-9]+
 num_raw_hex    [a-fA-F0-9]+
 name           [a-zA-Z_*?][a-zA-Z0-9_*?.]*
-name_minus     [a-zA-Z_*?][a-zA-Z0-9\-_*?.]*
+name_minus     [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
 /* If you add a modifier you need to update check_modifier() */
 modifier_event [ukhpPGHSDI]+
 modifier_bp    [rwx]{1,3}
@@ -176,10 +177,17 @@ modifier_bp       [rwx]{1,3}
 
 }
 
+<array>{
+"]"                    { BEGIN(config); return ']'; }
+{num_dec}              { return value(yyscanner, 10); }
+{num_hex}              { return value(yyscanner, 16); }
+,                      { return ','; }
+"\.\.\."               { return PE_ARRAY_RANGE; }
+}
+
 <config>{
        /*
-        * Please update parse_events_formats_error_string any time
-        * new static term is added.
+        * Please update config_term_names when new static term is added.
         */
 config                 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
 config1                        { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
@@ -196,6 +204,8 @@ no-inherit          { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
 ,                      { return ','; }
 "/"                    { BEGIN(INITIAL); return '/'; }
 {name_minus}           { return str(yyscanner, PE_NAME); }
+\[all\]                        { return PE_ARRAY_ALL; }
+"["                    { BEGIN(array); return '['; }
 }
 
 <mem>{
@@ -238,6 +248,7 @@ cpu-migrations|migrations                   { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU
 alignment-faults                               { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
 emulation-faults                               { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 dummy                                          { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+bpf-output                                     { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
 
        /*
         * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
index ad379968d4c10c0fb7bb2ddc3bacce3dc1166f43..5be4a5f216d6d23889e07e225810fec40df67c30 100644 (file)
@@ -28,7 +28,7 @@ do { \
        INIT_LIST_HEAD(list);         \
 } while (0)
 
-static inc_group_count(struct list_head *list,
+static void inc_group_count(struct list_head *list,
                       struct parse_events_evlist *data)
 {
        /* Count groups only have more than 1 members */
@@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list,
 %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
 %token PE_ERROR
 %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
 %type <num> PE_VALUE
 %type <num> PE_VALUE_SYM_HW
 %type <num> PE_VALUE_SYM_SW
@@ -64,6 +65,7 @@ static inc_group_count(struct list_head *list,
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
 %type <num> value_sym
 %type <head> event_config
+%type <head> opt_event_config
 %type <term> event_term
 %type <head> event_pmu
 %type <head> event_legacy_symbol
@@ -82,6 +84,9 @@ static inc_group_count(struct list_head *list,
 %type <head> group_def
 %type <head> group
 %type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
 
 %union
 {
@@ -93,6 +98,7 @@ static inc_group_count(struct list_head *list,
                char *sys;
                char *event;
        } tracepoint_name;
+       struct parse_events_array array;
 }
 %%
 
@@ -211,24 +217,14 @@ event_def: event_pmu |
           event_bpf_file
 
 event_pmu:
-PE_NAME '/' event_config '/'
+PE_NAME opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
-       parse_events__free_terms($3);
-       $$ = list;
-}
-|
-PE_NAME '/' '/'
-{
-       struct parse_events_evlist *data = _data;
-       struct list_head *list;
-
-       ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
+       ABORT_ON(parse_events_add_pmu(data, list, $1, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 |
@@ -246,7 +242,7 @@ PE_KERNEL_PMU_EVENT sep_dc
 
        ALLOC_LIST(list);
        ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-       parse_events__free_terms(head);
+       parse_events_terms__delete(head);
        $$ = list;
 }
 |
@@ -266,7 +262,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 
        ALLOC_LIST(list);
        ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
-       parse_events__free_terms(head);
+       parse_events_terms__delete(head);
        $$ = list;
 }
 
@@ -285,7 +281,7 @@ value_sym '/' event_config '/'
 
        ALLOC_LIST(list);
        ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
-       parse_events__free_terms($3);
+       parse_events_terms__delete($3);
        $$ = list;
 }
 |
@@ -302,33 +298,39 @@ value_sym sep_slash_dc
 }
 
 event_legacy_cache:
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
        struct parse_events_evlist *data = _data;
+       struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5));
+       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6));
+       parse_events_terms__delete($6);
        $$ = list;
 }
 |
-PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
 {
        struct parse_events_evlist *data = _data;
+       struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL));
+       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4));
+       parse_events_terms__delete($4);
        $$ = list;
 }
 |
-PE_NAME_CACHE_TYPE
+PE_NAME_CACHE_TYPE opt_event_config
 {
        struct parse_events_evlist *data = _data;
+       struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL));
+       ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 
@@ -378,24 +380,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
 }
 
 event_legacy_tracepoint:
-tracepoint_name
-{
-       struct parse_events_evlist *data = _data;
-       struct parse_events_error *error = data->error;
-       struct list_head *list;
-
-       ALLOC_LIST(list);
-       if (error)
-               error->idx = @1.first_column;
-
-       if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-                                       error, NULL))
-               return -1;
-
-       $$ = list;
-}
-|
-tracepoint_name '/' event_config '/'
+tracepoint_name opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct parse_events_error *error = data->error;
@@ -406,7 +391,7 @@ tracepoint_name '/' event_config '/'
                error->idx = @1.first_column;
 
        if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event,
-                                       error, $3))
+                                       error, $2))
                return -1;
 
        $$ = list;
@@ -433,49 +418,68 @@ PE_NAME ':' PE_NAME
 }
 
 event_legacy_numeric:
-PE_VALUE ':' PE_VALUE
+PE_VALUE ':' PE_VALUE opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
+       ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4));
+       parse_events_terms__delete($4);
        $$ = list;
 }
 
 event_legacy_raw:
-PE_RAW
+PE_RAW opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
+       ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 
 event_bpf_file:
-PE_BPF_OBJECT
+PE_BPF_OBJECT opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct parse_events_error *error = data->error;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_load_bpf(data, list, $1, false));
+       ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 |
-PE_BPF_SOURCE
+PE_BPF_SOURCE opt_event_config
 {
        struct parse_events_evlist *data = _data;
        struct list_head *list;
 
        ALLOC_LIST(list);
-       ABORT_ON(parse_events_load_bpf(data, list, $1, true));
+       ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2));
+       parse_events_terms__delete($2);
        $$ = list;
 }
 
+opt_event_config:
+'/' event_config '/'
+{
+       $$ = $2;
+}
+|
+'/' '/'
+{
+       $$ = NULL;
+}
+|
+{
+       $$ = NULL;
+}
+
 start_terms: event_config
 {
        struct parse_events_terms *data = _data;
@@ -573,6 +577,86 @@ PE_TERM
        ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
        $$ = term;
 }
+|
+PE_NAME array '=' PE_NAME
+{
+       struct parse_events_term *term;
+       int i;
+
+       ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                       $1, $4, &@1, &@4));
+
+       term->array = $2;
+       $$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+       struct parse_events_term *term;
+
+       ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                       $1, $4, &@1, &@4));
+       term->array = $2;
+       $$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+       $$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+       $$.nr_ranges = 0;
+       $$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+       struct parse_events_array new_array;
+
+       new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+       new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+                                 new_array.nr_ranges);
+       ABORT_ON(!new_array.ranges);
+       memcpy(&new_array.ranges[0], $1.ranges,
+              $1.nr_ranges * sizeof(new_array.ranges[0]));
+       memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+              $3.nr_ranges * sizeof(new_array.ranges[0]));
+       free($1.ranges);
+       free($3.ranges);
+       $$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+       struct parse_events_array array;
+
+       array.nr_ranges = 1;
+       array.ranges = malloc(sizeof(array.ranges[0]));
+       ABORT_ON(!array.ranges);
+       array.ranges[0].start = $1;
+       array.ranges[0].length = 1;
+       $$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+       struct parse_events_array array;
+
+       ABORT_ON($3 < $1);
+       array.nr_ranges = 1;
+       array.ranges = malloc(sizeof(array.ranges[0]));
+       ABORT_ON(!array.ranges);
+       array.ranges[0].start = $1;
+       array.ranges[0].length = $3 - $1 + 1;
+       $$ = array;
+}
 
 sep_dc: ':' |
 
index b597bcc8fc781f4fa2c631044bddaab447b756e9..adef23b1352e836fec9f0f9a5290c578bb25cc43 100644 (file)
@@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
        char scale[128];
        int fd, ret = -1;
        char path[PATH_MAX];
-       const char *lc;
+       char *lc;
 
        snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
 
@@ -123,6 +123,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
         */
        lc = setlocale(LC_NUMERIC, NULL);
 
+       /*
+        * The lc string may be allocated in static storage,
+        * so get a dynamic copy to make it survive setlocale
+        * call below.
+        */
+       lc = strdup(lc);
+       if (!lc) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
        /*
         * force to C locale to ensure kernel
         * scale string is converted correctly.
@@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
        /* restore locale */
        setlocale(LC_NUMERIC, lc);
 
+       free(lc);
+
        ret = 0;
 error:
        close(fd);
@@ -153,7 +166,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n
        if (fd == -1)
                return -1;
 
-               sret = read(fd, alias->unit, UNIT_MAX_LEN);
+       sret = read(fd, alias->unit, UNIT_MAX_LEN);
        if (sret < 0)
                goto error;
 
@@ -284,13 +297,12 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
 {
        struct dirent *evt_ent;
        DIR *event_dir;
-       int ret = 0;
 
        event_dir = opendir(dir);
        if (!event_dir)
                return -EINVAL;
 
-       while (!ret && (evt_ent = readdir(event_dir))) {
+       while ((evt_ent = readdir(event_dir))) {
                char path[PATH_MAX];
                char *name = evt_ent->d_name;
                FILE *file;
@@ -306,17 +318,19 @@ static int pmu_aliases_parse(char *dir, struct list_head *head)
 
                snprintf(path, PATH_MAX, "%s/%s", dir, name);
 
-               ret = -EINVAL;
                file = fopen(path, "r");
-               if (!file)
-                       break;
+               if (!file) {
+                       pr_debug("Cannot open %s\n", path);
+                       continue;
+               }
 
-               ret = perf_pmu__new_alias(head, dir, name, file);
+               if (perf_pmu__new_alias(head, dir, name, file) < 0)
+                       pr_debug("Cannot set up %s\n", name);
                fclose(file);
        }
 
        closedir(event_dir);
-       return ret;
+       return 0;
 }
 
 /*
@@ -354,7 +368,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
        list_for_each_entry(term, &alias->terms, list) {
                ret = parse_events_term__clone(&cloned, term);
                if (ret) {
-                       parse_events__free_terms(&list);
+                       parse_events_terms__purge(&list);
                        return ret;
                }
                list_add_tail(&cloned->list, &list);
index 544509c159cec4111e5865007b513e1197c576c4..b3aabc0d4eb0096fff41fb078a09d77988f103ff 100644 (file)
@@ -187,6 +187,9 @@ static void define_event_symbols(struct event_format *event,
                                 const char *ev_name,
                                 struct print_arg *args)
 {
+       if (args == NULL)
+               return;
+
        switch (args->type) {
        case PRINT_NULL:
                break;
index d72fafc1c800db1a699ac37e4ecb4acd0f1e3326..fbd05242b4e59786ca0e081a52729248d780f5a0 100644 (file)
@@ -205,6 +205,9 @@ static void define_event_symbols(struct event_format *event,
                                 const char *ev_name,
                                 struct print_arg *args)
 {
+       if (args == NULL)
+               return;
+
        switch (args->type) {
        case PRINT_NULL:
                break;
@@ -1091,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv)
                goto error;
        }
 
-       free(command_line);
-
        set_table_handlers(tables);
 
        if (tables->db_export_mode) {
@@ -1101,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv)
                        goto error;
        }
 
+       free(command_line);
+
        return err;
 error:
        Py_Finalize();
index 40b7a0d0905b8d7f01972c6e38e225f108b15133..60b3593d210dbc3b52997a693bcc8f9645687f08 100644 (file)
@@ -240,14 +240,6 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-static int process_build_id_stub(struct perf_tool *tool __maybe_unused,
-                                union perf_event *event __maybe_unused,
-                                struct perf_session *session __maybe_unused)
-{
-       dump_printf(": unhandled!\n");
-       return 0;
-}
-
 static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
                                       union perf_event *event __maybe_unused,
                                       struct ordered_events *oe __maybe_unused)
@@ -260,23 +252,6 @@ static int process_finished_round(struct perf_tool *tool,
                                  union perf_event *event,
                                  struct ordered_events *oe);
 
-static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
-                                union perf_event *event __maybe_unused,
-                                struct perf_session *perf_session
-                                __maybe_unused)
-{
-       dump_printf(": unhandled!\n");
-       return 0;
-}
-
-static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
-                               union perf_event *event __maybe_unused,
-                               struct perf_session *session __maybe_unused)
-{
-       dump_printf(": unhandled!\n");
-       return 0;
-}
-
 static int skipn(int fd, off_t n)
 {
        char buf[4096];
@@ -303,10 +278,9 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
        return event->auxtrace.size;
 }
 
-static
-int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
-                                     union perf_event *event __maybe_unused,
-                                     struct perf_session *session __maybe_unused)
+static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
+                                 union perf_event *event __maybe_unused,
+                                 struct perf_session *session __maybe_unused)
 {
        dump_printf(": unhandled!\n");
        return 0;
@@ -410,7 +384,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
        if (tool->tracing_data == NULL)
                tool->tracing_data = process_event_synth_tracing_data_stub;
        if (tool->build_id == NULL)
-               tool->build_id = process_build_id_stub;
+               tool->build_id = process_event_op2_stub;
        if (tool->finished_round == NULL) {
                if (tool->ordered_events)
                        tool->finished_round = process_finished_round;
@@ -418,13 +392,13 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
                        tool->finished_round = process_finished_round_stub;
        }
        if (tool->id_index == NULL)
-               tool->id_index = process_id_index_stub;
+               tool->id_index = process_event_op2_stub;
        if (tool->auxtrace_info == NULL)
-               tool->auxtrace_info = process_event_auxtrace_info_stub;
+               tool->auxtrace_info = process_event_op2_stub;
        if (tool->auxtrace == NULL)
                tool->auxtrace = process_event_auxtrace_stub;
        if (tool->auxtrace_error == NULL)
-               tool->auxtrace_error = process_event_auxtrace_error_stub;
+               tool->auxtrace_error = process_event_op2_stub;
        if (tool->thread_map == NULL)
                tool->thread_map = process_event_thread_map_stub;
        if (tool->cpu_map == NULL)
index 1833103768cb99fbdbb92ea8910b08488fd2aac3..c8680984d2d6680f56a974a1b54056a5e74263e1 100644 (file)
@@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
 cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
 
+src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
 libtraceevent = getenv('LIBTRACEEVENT')
@@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI')
 ext_sources = [f.strip() for f in file('util/python-ext-sources')
                                if len(f.strip()) > 0 and f[0] != '#']
 
+# use full paths with source files
+ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)
+
 perf = Extension('perf',
                  sources = ext_sources,
                  include_dirs = ['util/include'],
index ec722346e6ffb8dd6531e94576bb15b548a1487b..93fa136b0025c02e1e4383be523fcf37c092f36e 100644 (file)
@@ -6,6 +6,7 @@
 #include "evsel.h"
 #include "evlist.h"
 #include <traceevent/event-parse.h>
+#include "mem-events.h"
 
 regex_t                parent_regex;
 const char     default_parent_pattern[] = "^sys_|^do_page_fault";
@@ -25,9 +26,19 @@ int          sort__has_parent = 0;
 int            sort__has_sym = 0;
 int            sort__has_dso = 0;
 int            sort__has_socket = 0;
+int            sort__has_thread = 0;
+int            sort__has_comm = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
 
-
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurances of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
 static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
 {
        int n;
@@ -80,10 +91,21 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
                               width, width, comm ?: "");
 }
 
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+       const struct thread *th = arg;
+
+       if (type != HIST_FILTER__THREAD)
+               return -1;
+
+       return th && he->thread != th;
+}
+
 struct sort_entry sort_thread = {
        .se_header      = "  Pid:Command",
        .se_cmp         = sort__thread_cmp,
        .se_snprintf    = hist_entry__thread_snprintf,
+       .se_filter      = hist_entry__thread_filter,
        .se_width_idx   = HISTC_THREAD,
 };
 
@@ -121,6 +143,7 @@ struct sort_entry sort_comm = {
        .se_collapse    = sort__comm_collapse,
        .se_sort        = sort__comm_sort,
        .se_snprintf    = hist_entry__comm_snprintf,
+       .se_filter      = hist_entry__thread_filter,
        .se_width_idx   = HISTC_COMM,
 };
 
@@ -170,10 +193,21 @@ static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
        return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
 }
 
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+       const struct dso *dso = arg;
+
+       if (type != HIST_FILTER__DSO)
+               return -1;
+
+       return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
 struct sort_entry sort_dso = {
        .se_header      = "Shared Object",
        .se_cmp         = sort__dso_cmp,
        .se_snprintf    = hist_entry__dso_snprintf,
+       .se_filter      = hist_entry__dso_filter,
        .se_width_idx   = HISTC_DSO,
 };
 
@@ -246,10 +280,8 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
                        ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
                        ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
                                        ip - map->unmap_ip(map, sym->start));
-                       ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-                                      width - ret, "");
                } else {
-                       ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
+                       ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
                                               width - ret,
                                               sym->name);
                }
@@ -257,14 +289,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
                size_t len = BITS_PER_LONG / 4;
                ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
                                       len, ip);
-               ret += repsep_snprintf(bf + ret, size - ret, "%-*s",
-                                      width - ret, "");
        }
 
-       if (ret > width)
-               bf[width] = '\0';
-
-       return width;
+       return ret;
 }
 
 static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
@@ -274,46 +301,56 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
                                         he->level, bf, size, width);
 }
 
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+       const char *sym = arg;
+
+       if (type != HIST_FILTER__SYMBOL)
+               return -1;
+
+       return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
 struct sort_entry sort_sym = {
        .se_header      = "Symbol",
        .se_cmp         = sort__sym_cmp,
        .se_sort        = sort__sym_sort,
        .se_snprintf    = hist_entry__sym_snprintf,
+       .se_filter      = hist_entry__sym_filter,
        .se_width_idx   = HISTC_SYMBOL,
 };
 
 /* --sort srcline */
 
+static char *hist_entry__get_srcline(struct hist_entry *he)
+{
+       struct map *map = he->ms.map;
+
+       if (!map)
+               return SRCLINE_UNKNOWN;
+
+       return get_srcline(map->dso, map__rip_2objdump(map, he->ip),
+                          he->ms.sym, true);
+}
+
 static int64_t
 sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-       if (!left->srcline) {
-               if (!left->ms.map)
-                       left->srcline = SRCLINE_UNKNOWN;
-               else {
-                       struct map *map = left->ms.map;
-                       left->srcline = get_srcline(map->dso,
-                                          map__rip_2objdump(map, left->ip),
-                                                   left->ms.sym, true);
-               }
-       }
-       if (!right->srcline) {
-               if (!right->ms.map)
-                       right->srcline = SRCLINE_UNKNOWN;
-               else {
-                       struct map *map = right->ms.map;
-                       right->srcline = get_srcline(map->dso,
-                                            map__rip_2objdump(map, right->ip),
-                                                    right->ms.sym, true);
-               }
-       }
+       if (!left->srcline)
+               left->srcline = hist_entry__get_srcline(left);
+       if (!right->srcline)
+               right->srcline = hist_entry__get_srcline(right);
+
        return strcmp(right->srcline, left->srcline);
 }
 
 static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
-       return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline);
+       if (!he->srcline)
+               he->srcline = hist_entry__get_srcline(he);
+
+       return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
 }
 
 struct sort_entry sort_srcline = {
@@ -327,11 +364,14 @@ struct sort_entry sort_srcline = {
 
 static char no_srcfile[1];
 
-static char *get_srcfile(struct hist_entry *e)
+static char *hist_entry__get_srcfile(struct hist_entry *e)
 {
        char *sf, *p;
        struct map *map = e->ms.map;
 
+       if (!map)
+               return no_srcfile;
+
        sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
                         e->ms.sym, false, true);
        if (!strcmp(sf, SRCLINE_UNKNOWN))
@@ -348,25 +388,21 @@ static char *get_srcfile(struct hist_entry *e)
 static int64_t
 sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
 {
-       if (!left->srcfile) {
-               if (!left->ms.map)
-                       left->srcfile = no_srcfile;
-               else
-                       left->srcfile = get_srcfile(left);
-       }
-       if (!right->srcfile) {
-               if (!right->ms.map)
-                       right->srcfile = no_srcfile;
-               else
-                       right->srcfile = get_srcfile(right);
-       }
+       if (!left->srcfile)
+               left->srcfile = hist_entry__get_srcfile(left);
+       if (!right->srcfile)
+               right->srcfile = hist_entry__get_srcfile(right);
+
        return strcmp(right->srcfile, left->srcfile);
 }
 
 static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
                                        size_t size, unsigned int width)
 {
-       return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile);
+       if (!he->srcfile)
+               he->srcfile = hist_entry__get_srcfile(he);
+
+       return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
 }
 
 struct sort_entry sort_srcfile = {
@@ -439,10 +475,21 @@ static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
        return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
 }
 
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+       int sk = *(const int *)arg;
+
+       if (type != HIST_FILTER__SOCKET)
+               return -1;
+
+       return sk >= 0 && he->socket != sk;
+}
+
 struct sort_entry sort_socket = {
        .se_header      = "Socket",
        .se_cmp         = sort__socket_cmp,
        .se_snprintf    = hist_entry__socket_snprintf,
+       .se_filter      = hist_entry__socket_filter,
        .se_width_idx   = HISTC_SOCKET,
 };
 
@@ -483,9 +530,6 @@ sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
        if (right->trace_output == NULL)
                right->trace_output = get_trace_output(right);
 
-       hists__new_col_len(left->hists, HISTC_TRACE, strlen(left->trace_output));
-       hists__new_col_len(right->hists, HISTC_TRACE, strlen(right->trace_output));
-
        return strcmp(right->trace_output, left->trace_output);
 }
 
@@ -496,11 +540,11 @@ static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
 
        evsel = hists_to_evsel(he->hists);
        if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
-               return scnprintf(bf, size, "%-*.*s", width, width, "N/A");
+               return scnprintf(bf, size, "%-.*s", width, "N/A");
 
        if (he->trace_output == NULL)
                he->trace_output = get_trace_output(he);
-       return repsep_snprintf(bf, size, "%-*.*s", width, width, he->trace_output);
+       return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
 }
 
 struct sort_entry sort_trace = {
@@ -532,6 +576,18 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
                return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+                                      const void *arg)
+{
+       const struct dso *dso = arg;
+
+       if (type != HIST_FILTER__DSO)
+               return -1;
+
+       return dso && (!he->branch_info || !he->branch_info->from.map ||
+                      he->branch_info->from.map->dso != dso);
+}
+
 static int64_t
 sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -552,6 +608,18 @@ static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
                return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+                                    const void *arg)
+{
+       const struct dso *dso = arg;
+
+       if (type != HIST_FILTER__DSO)
+               return -1;
+
+       return dso && (!he->branch_info || !he->branch_info->to.map ||
+                      he->branch_info->to.map->dso != dso);
+}
+
 static int64_t
 sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -613,10 +681,35 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
        return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
 }
 
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+                                      const void *arg)
+{
+       const char *sym = arg;
+
+       if (type != HIST_FILTER__SYMBOL)
+               return -1;
+
+       return sym && !(he->branch_info && he->branch_info->from.sym &&
+                       strstr(he->branch_info->from.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+                                      const void *arg)
+{
+       const char *sym = arg;
+
+       if (type != HIST_FILTER__SYMBOL)
+               return -1;
+
+       return sym && !(he->branch_info && he->branch_info->to.sym &&
+                       strstr(he->branch_info->to.sym->name, sym));
+}
+
 struct sort_entry sort_dso_from = {
        .se_header      = "Source Shared Object",
        .se_cmp         = sort__dso_from_cmp,
        .se_snprintf    = hist_entry__dso_from_snprintf,
+       .se_filter      = hist_entry__dso_from_filter,
        .se_width_idx   = HISTC_DSO_FROM,
 };
 
@@ -624,6 +717,7 @@ struct sort_entry sort_dso_to = {
        .se_header      = "Target Shared Object",
        .se_cmp         = sort__dso_to_cmp,
        .se_snprintf    = hist_entry__dso_to_snprintf,
+       .se_filter      = hist_entry__dso_to_filter,
        .se_width_idx   = HISTC_DSO_TO,
 };
 
@@ -631,6 +725,7 @@ struct sort_entry sort_sym_from = {
        .se_header      = "Source Symbol",
        .se_cmp         = sort__sym_from_cmp,
        .se_snprintf    = hist_entry__sym_from_snprintf,
+       .se_filter      = hist_entry__sym_from_filter,
        .se_width_idx   = HISTC_SYMBOL_FROM,
 };
 
@@ -638,6 +733,7 @@ struct sort_entry sort_sym_to = {
        .se_header      = "Target Symbol",
        .se_cmp         = sort__sym_to_cmp,
        .se_snprintf    = hist_entry__sym_to_snprintf,
+       .se_filter      = hist_entry__sym_to_filter,
        .se_width_idx   = HISTC_SYMBOL_TO,
 };
 
@@ -797,20 +893,10 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
 static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
-       const char *out;
-       u64 mask = PERF_MEM_LOCK_NA;
+       char out[10];
 
-       if (he->mem_info)
-               mask = he->mem_info->data_src.mem_lock;
-
-       if (mask & PERF_MEM_LOCK_NA)
-               out = "N/A";
-       else if (mask & PERF_MEM_LOCK_LOCKED)
-               out = "Yes";
-       else
-               out = "No";
-
-       return repsep_snprintf(bf, size, "%-*s", width, out);
+       perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+       return repsep_snprintf(bf, size, "%.*s", width, out);
 }
 
 static int64_t
@@ -832,54 +918,12 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
        return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
 }
 
-static const char * const tlb_access[] = {
-       "N/A",
-       "HIT",
-       "MISS",
-       "L1",
-       "L2",
-       "Walker",
-       "Fault",
-};
-#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *))
-
 static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
        char out[64];
-       size_t sz = sizeof(out) - 1; /* -1 for null termination */
-       size_t l = 0, i;
-       u64 m = PERF_MEM_TLB_NA;
-       u64 hit, miss;
-
-       out[0] = '\0';
-
-       if (he->mem_info)
-               m = he->mem_info->data_src.mem_dtlb;
-
-       hit = m & PERF_MEM_TLB_HIT;
-       miss = m & PERF_MEM_TLB_MISS;
-
-       /* already taken care of */
-       m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
-
-       for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) {
-               if (!(m & 0x1))
-                       continue;
-               if (l) {
-                       strcat(out, " or ");
-                       l += 4;
-               }
-               strncat(out, tlb_access[i], sz - l);
-               l += strlen(tlb_access[i]);
-       }
-       if (*out == '\0')
-               strcpy(out, "N/A");
-       if (hit)
-               strncat(out, " hit", sz - l);
-       if (miss)
-               strncat(out, " miss", sz - l);
 
+       perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
        return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -902,61 +946,12 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
        return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
 }
 
-static const char * const mem_lvl[] = {
-       "N/A",
-       "HIT",
-       "MISS",
-       "L1",
-       "LFB",
-       "L2",
-       "L3",
-       "Local RAM",
-       "Remote RAM (1 hop)",
-       "Remote RAM (2 hops)",
-       "Remote Cache (1 hop)",
-       "Remote Cache (2 hops)",
-       "I/O",
-       "Uncached",
-};
-#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *))
-
 static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
        char out[64];
-       size_t sz = sizeof(out) - 1; /* -1 for null termination */
-       size_t i, l = 0;
-       u64 m =  PERF_MEM_LVL_NA;
-       u64 hit, miss;
-
-       if (he->mem_info)
-               m  = he->mem_info->data_src.mem_lvl;
-
-       out[0] = '\0';
-
-       hit = m & PERF_MEM_LVL_HIT;
-       miss = m & PERF_MEM_LVL_MISS;
-
-       /* already taken care of */
-       m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
-
-       for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) {
-               if (!(m & 0x1))
-                       continue;
-               if (l) {
-                       strcat(out, " or ");
-                       l += 4;
-               }
-               strncat(out, mem_lvl[i], sz - l);
-               l += strlen(mem_lvl[i]);
-       }
-       if (*out == '\0')
-               strcpy(out, "N/A");
-       if (hit)
-               strncat(out, " hit", sz - l);
-       if (miss)
-               strncat(out, " miss", sz - l);
 
+       perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
        return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
@@ -979,51 +974,15 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
        return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
 }
 
-static const char * const snoop_access[] = {
-       "N/A",
-       "None",
-       "Miss",
-       "Hit",
-       "HitM",
-};
-#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *))
-
 static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
        char out[64];
-       size_t sz = sizeof(out) - 1; /* -1 for null termination */
-       size_t i, l = 0;
-       u64 m = PERF_MEM_SNOOP_NA;
-
-       out[0] = '\0';
-
-       if (he->mem_info)
-               m = he->mem_info->data_src.mem_snoop;
-
-       for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) {
-               if (!(m & 0x1))
-                       continue;
-               if (l) {
-                       strcat(out, " or ");
-                       l += 4;
-               }
-               strncat(out, snoop_access[i], sz - l);
-               l += strlen(snoop_access[i]);
-       }
-
-       if (*out == '\0')
-               strcpy(out, "N/A");
 
+       perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
        return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
-static inline  u64 cl_address(u64 address)
-{
-       /* return the cacheline of the address */
-       return (address & ~(cacheline_size - 1));
-}
-
 static int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -1440,20 +1399,6 @@ struct hpp_sort_entry {
        struct sort_entry *se;
 };
 
-bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
-{
-       struct hpp_sort_entry *hse_a;
-       struct hpp_sort_entry *hse_b;
-
-       if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
-               return false;
-
-       hse_a = container_of(a, struct hpp_sort_entry, hpp);
-       hse_b = container_of(b, struct hpp_sort_entry, hpp);
-
-       return hse_a->se == hse_b->se;
-}
-
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
 {
        struct hpp_sort_entry *hse;
@@ -1539,8 +1484,56 @@ static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
        return sort_fn(a, b);
 }
 
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+       return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key)                                 \
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)  \
+{                                                              \
+       struct hpp_sort_entry *hse;                             \
+                                                               \
+       if (!perf_hpp__is_sort_entry(fmt))                      \
+               return false;                                   \
+                                                               \
+       hse = container_of(fmt, struct hpp_sort_entry, hpp);    \
+       return hse->se == &sort_ ## key ;                       \
+}
+
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       struct hpp_sort_entry *hse_a;
+       struct hpp_sort_entry *hse_b;
+
+       if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+               return false;
+
+       hse_a = container_of(a, struct hpp_sort_entry, hpp);
+       hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+       return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+       struct hpp_sort_entry *hse;
+
+       hse = container_of(fmt, struct hpp_sort_entry, hpp);
+       free(hse);
+}
+
 static struct hpp_sort_entry *
-__sort_dimension__alloc_hpp(struct sort_dimension *sd)
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
 {
        struct hpp_sort_entry *hse;
 
@@ -1560,40 +1553,92 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd)
        hse->hpp.cmp = __sort__hpp_cmp;
        hse->hpp.collapse = __sort__hpp_collapse;
        hse->hpp.sort = __sort__hpp_sort;
+       hse->hpp.equal = __sort__hpp_equal;
+       hse->hpp.free = hse_free;
 
        INIT_LIST_HEAD(&hse->hpp.list);
        INIT_LIST_HEAD(&hse->hpp.sort_list);
        hse->hpp.elide = false;
        hse->hpp.len = 0;
        hse->hpp.user_len = 0;
+       hse->hpp.level = level;
 
        return hse;
 }
 
-bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+static void hpp_free(struct perf_hpp_fmt *fmt)
 {
-       return format->header == __sort__hpp_header;
+       free(fmt);
+}
+
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+                                                      int level)
+{
+       struct perf_hpp_fmt *fmt;
+
+       fmt = memdup(hd->fmt, sizeof(*fmt));
+       if (fmt) {
+               INIT_LIST_HEAD(&fmt->list);
+               INIT_LIST_HEAD(&fmt->sort_list);
+               fmt->free = hpp_free;
+               fmt->level = level;
+       }
+
+       return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+       struct perf_hpp_fmt *fmt;
+       struct hpp_sort_entry *hse;
+       int ret = -1;
+       int r;
+
+       perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+               if (!perf_hpp__is_sort_entry(fmt))
+                       continue;
+
+               hse = container_of(fmt, struct hpp_sort_entry, hpp);
+               if (hse->se->se_filter == NULL)
+                       continue;
+
+               /*
+                * hist entry is filtered if any of sort key in the hpp list
+                * is applied.  But it should skip non-matched filter types.
+                */
+               r = hse->se->se_filter(he, type, arg);
+               if (r >= 0) {
+                       if (ret < 0)
+                               ret = 0;
+                       ret |= r;
+               }
+       }
+
+       return ret;
 }
 
-static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+                                         struct perf_hpp_list *list,
+                                         int level)
 {
-       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
 
        if (hse == NULL)
                return -1;
 
-       perf_hpp__register_sort_field(&hse->hpp);
+       perf_hpp_list__register_sort_field(list, &hse->hpp);
        return 0;
 }
 
-static int __sort_dimension__add_hpp_output(struct sort_dimension *sd)
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+                                           struct perf_hpp_list *list)
 {
-       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd);
+       struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
 
        if (hse == NULL)
                return -1;
 
-       perf_hpp__column_register(&hse->hpp);
+       perf_hpp_list__column_register(list, &hse->hpp);
        return 0;
 }
 
@@ -1727,6 +1772,9 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
        if (hde->raw_trace)
                goto raw_field;
 
+       if (!he->trace_output)
+               he->trace_output = get_trace_output(he);
+
        field = hde->field;
        namelen = strlen(field->name);
        str = he->trace_output;
@@ -1776,6 +1824,11 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
 
        hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
 
+       if (b == NULL) {
+               update_dynamic_len(hde, a);
+               return 0;
+       }
+
        field = hde->field;
        if (field->flags & FIELD_IS_DYNAMIC) {
                unsigned long long dyn;
@@ -1790,9 +1843,6 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
        } else {
                offset = field->offset;
                size = field->size;
-
-               update_dynamic_len(hde, a);
-               update_dynamic_len(hde, b);
        }
 
        return memcmp(a->raw_data + offset, b->raw_data + offset, size);
@@ -1803,8 +1853,31 @@ bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
        return fmt->cmp == __sort__hde_cmp;
 }
 
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+       struct hpp_dynamic_entry *hde_a;
+       struct hpp_dynamic_entry *hde_b;
+
+       if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+               return false;
+
+       hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+       hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+       return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+       struct hpp_dynamic_entry *hde;
+
+       hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+       free(hde);
+}
+
 static struct hpp_dynamic_entry *
-__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+                     int level)
 {
        struct hpp_dynamic_entry *hde;
 
@@ -1827,16 +1900,47 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field)
        hde->hpp.cmp = __sort__hde_cmp;
        hde->hpp.collapse = __sort__hde_cmp;
        hde->hpp.sort = __sort__hde_cmp;
+       hde->hpp.equal = __sort__hde_equal;
+       hde->hpp.free = hde_free;
 
        INIT_LIST_HEAD(&hde->hpp.list);
        INIT_LIST_HEAD(&hde->hpp.sort_list);
        hde->hpp.elide = false;
        hde->hpp.len = 0;
        hde->hpp.user_len = 0;
+       hde->hpp.level = level;
 
        return hde;
 }
 
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+       struct perf_hpp_fmt *new_fmt = NULL;
+
+       if (perf_hpp__is_sort_entry(fmt)) {
+               struct hpp_sort_entry *hse, *new_hse;
+
+               hse = container_of(fmt, struct hpp_sort_entry, hpp);
+               new_hse = memdup(hse, sizeof(*hse));
+               if (new_hse)
+                       new_fmt = &new_hse->hpp;
+       } else if (perf_hpp__is_dynamic_entry(fmt)) {
+               struct hpp_dynamic_entry *hde, *new_hde;
+
+               hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+               new_hde = memdup(hde, sizeof(*hde));
+               if (new_hde)
+                       new_fmt = &new_hde->hpp;
+       } else {
+               new_fmt = memdup(fmt, sizeof(*fmt));
+       }
+
+       INIT_LIST_HEAD(&new_fmt->list);
+       INIT_LIST_HEAD(&new_fmt->sort_list);
+
+       return new_fmt;
+}
+
 static int parse_field_name(char *str, char **event, char **field, char **opt)
 {
        char *event_name, *field_name, *opt_name;
@@ -1908,11 +2012,11 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam
 
 static int __dynamic_dimension__add(struct perf_evsel *evsel,
                                    struct format_field *field,
-                                   bool raw_trace)
+                                   bool raw_trace, int level)
 {
        struct hpp_dynamic_entry *hde;
 
-       hde = __alloc_dynamic_entry(evsel, field);
+       hde = __alloc_dynamic_entry(evsel, field, level);
        if (hde == NULL)
                return -ENOMEM;
 
@@ -1922,14 +2026,14 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel,
        return 0;
 }
 
-static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
 {
        int ret;
        struct format_field *field;
 
        field = evsel->tp_format->format.fields;
        while (field) {
-               ret = __dynamic_dimension__add(evsel, field, raw_trace);
+               ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
                if (ret < 0)
                        return ret;
 
@@ -1938,7 +2042,8 @@ static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace)
        return 0;
 }
 
-static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace,
+                                 int level)
 {
        int ret;
        struct perf_evsel *evsel;
@@ -1947,7 +2052,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
                if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
                        continue;
 
-               ret = add_evsel_fields(evsel, raw_trace);
+               ret = add_evsel_fields(evsel, raw_trace, level);
                if (ret < 0)
                        return ret;
        }
@@ -1955,7 +2060,7 @@ static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace)
 }
 
 static int add_all_matching_fields(struct perf_evlist *evlist,
-                                  char *field_name, bool raw_trace)
+                                  char *field_name, bool raw_trace, int level)
 {
        int ret = -ESRCH;
        struct perf_evsel *evsel;
@@ -1969,14 +2074,15 @@ static int add_all_matching_fields(struct perf_evlist *evlist,
                if (field == NULL)
                        continue;
 
-               ret = __dynamic_dimension__add(evsel, field, raw_trace);
+               ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
                if (ret < 0)
                        break;
        }
        return ret;
 }
 
-static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
+                            int level)
 {
        char *str, *event_name, *field_name, *opt_name;
        struct perf_evsel *evsel;
@@ -2006,12 +2112,12 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
        }
 
        if (!strcmp(field_name, "trace_fields")) {
-               ret = add_all_dynamic_fields(evlist, raw_trace);
+               ret = add_all_dynamic_fields(evlist, raw_trace, level);
                goto out;
        }
 
        if (event_name == NULL) {
-               ret = add_all_matching_fields(evlist, field_name, raw_trace);
+               ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
                goto out;
        }
 
@@ -2029,7 +2135,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
        }
 
        if (!strcmp(field_name, "*")) {
-               ret = add_evsel_fields(evsel, raw_trace);
+               ret = add_evsel_fields(evsel, raw_trace, level);
        } else {
                field = pevent_find_any_field(evsel->tp_format, field_name);
                if (field == NULL) {
@@ -2038,7 +2144,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok)
                        return -ENOENT;
                }
 
-               ret = __dynamic_dimension__add(evsel, field, raw_trace);
+               ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
        }
 
 out:
@@ -2046,12 +2152,14 @@ out:
        return ret;
 }
 
-static int __sort_dimension__add(struct sort_dimension *sd)
+static int __sort_dimension__add(struct sort_dimension *sd,
+                                struct perf_hpp_list *list,
+                                int level)
 {
        if (sd->taken)
                return 0;
 
-       if (__sort_dimension__add_hpp_sort(sd) < 0)
+       if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
                return -1;
 
        if (sd->entry->se_collapse)
@@ -2062,46 +2170,63 @@ static int __sort_dimension__add(struct sort_dimension *sd)
        return 0;
 }
 
-static int __hpp_dimension__add(struct hpp_dimension *hd)
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+                               struct perf_hpp_list *list,
+                               int level)
 {
-       if (!hd->taken) {
-               hd->taken = 1;
+       struct perf_hpp_fmt *fmt;
 
-               perf_hpp__register_sort_field(hd->fmt);
-       }
+       if (hd->taken)
+               return 0;
+
+       fmt = __hpp_dimension__alloc_hpp(hd, level);
+       if (!fmt)
+               return -1;
+
+       hd->taken = 1;
+       perf_hpp_list__register_sort_field(list, fmt);
        return 0;
 }
 
-static int __sort_dimension__add_output(struct sort_dimension *sd)
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+                                       struct sort_dimension *sd)
 {
        if (sd->taken)
                return 0;
 
-       if (__sort_dimension__add_hpp_output(sd) < 0)
+       if (__sort_dimension__add_hpp_output(sd, list) < 0)
                return -1;
 
        sd->taken = 1;
        return 0;
 }
 
-static int __hpp_dimension__add_output(struct hpp_dimension *hd)
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+                                      struct hpp_dimension *hd)
 {
-       if (!hd->taken) {
-               hd->taken = 1;
+       struct perf_hpp_fmt *fmt;
 
-               perf_hpp__column_register(hd->fmt);
-       }
+       if (hd->taken)
+               return 0;
+
+       fmt = __hpp_dimension__alloc_hpp(hd, 0);
+       if (!fmt)
+               return -1;
+
+       hd->taken = 1;
+       perf_hpp_list__column_register(list, fmt);
        return 0;
 }
 
 int hpp_dimension__add_output(unsigned col)
 {
        BUG_ON(col >= PERF_HPP__MAX_INDEX);
-       return __hpp_dimension__add_output(&hpp_sort_dimensions[col]);
+       return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(const char *tok,
-                              struct perf_evlist *evlist __maybe_unused)
+static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+                              struct perf_evlist *evlist __maybe_unused,
+                              int level)
 {
        unsigned int i;
 
@@ -2136,9 +2261,13 @@ static int sort_dimension__add(const char *tok,
                        sort__has_dso = 1;
                } else if (sd->entry == &sort_socket) {
                        sort__has_socket = 1;
+               } else if (sd->entry == &sort_thread) {
+                       sort__has_thread = 1;
+               } else if (sd->entry == &sort_comm) {
+                       sort__has_comm = 1;
                }
 
-               return __sort_dimension__add(sd);
+               return __sort_dimension__add(sd, list, level);
        }
 
        for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2147,7 +2276,7 @@ static int sort_dimension__add(const char *tok,
                if (strncasecmp(tok, hd->name, strlen(tok)))
                        continue;
 
-               return __hpp_dimension__add(hd);
+               return __hpp_dimension__add(hd, list, level);
        }
 
        for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2162,7 +2291,7 @@ static int sort_dimension__add(const char *tok,
                if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
                        sort__has_sym = 1;
 
-               __sort_dimension__add(sd);
+               __sort_dimension__add(sd, list, level);
                return 0;
        }
 
@@ -2178,16 +2307,60 @@ static int sort_dimension__add(const char *tok,
                if (sd->entry == &sort_mem_daddr_sym)
                        sort__has_sym = 1;
 
-               __sort_dimension__add(sd);
+               __sort_dimension__add(sd, list, level);
                return 0;
        }
 
-       if (!add_dynamic_entry(evlist, tok))
+       if (!add_dynamic_entry(evlist, tok, level))
                return 0;
 
        return -ESRCH;
 }
 
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+                          struct perf_evlist *evlist)
+{
+       char *tmp, *tok;
+       int ret = 0;
+       int level = 0;
+       int next_level = 1;
+       bool in_group = false;
+
+       do {
+               tok = str;
+               tmp = strpbrk(str, "{}, ");
+               if (tmp) {
+                       if (in_group)
+                               next_level = level;
+                       else
+                               next_level = level + 1;
+
+                       if (*tmp == '{')
+                               in_group = true;
+                       else if (*tmp == '}')
+                               in_group = false;
+
+                       *tmp = '\0';
+                       str = tmp + 1;
+               }
+
+               if (*tok) {
+                       ret = sort_dimension__add(list, tok, evlist, level);
+                       if (ret == -EINVAL) {
+                               error("Invalid --sort key: `%s'", tok);
+                               break;
+                       } else if (ret == -ESRCH) {
+                               error("Unknown --sort key: `%s'", tok);
+                               break;
+                       }
+               }
+
+               level = next_level;
+       } while (tmp);
+
+       return ret;
+}
+
 static const char *get_default_sort_order(struct perf_evlist *evlist)
 {
        const char *default_sort_orders[] = {
@@ -2282,7 +2455,7 @@ static char *setup_overhead(char *keys)
 
 static int __setup_sorting(struct perf_evlist *evlist)
 {
-       char *tmp, *tok, *str;
+       char *str;
        const char *sort_keys;
        int ret = 0;
 
@@ -2320,17 +2493,7 @@ static int __setup_sorting(struct perf_evlist *evlist)
                }
        }
 
-       for (tok = strtok_r(str, ", ", &tmp);
-                       tok; tok = strtok_r(NULL, ", ", &tmp)) {
-               ret = sort_dimension__add(tok, evlist);
-               if (ret == -EINVAL) {
-                       error("Invalid --sort key: `%s'", tok);
-                       break;
-               } else if (ret == -ESRCH) {
-                       error("Unknown --sort key: `%s'", tok);
-                       break;
-               }
-       }
+       ret = setup_sort_list(&perf_hpp_list, str, evlist);
 
        free(str);
        return ret;
@@ -2341,7 +2504,7 @@ void perf_hpp__set_elide(int idx, bool elide)
        struct perf_hpp_fmt *fmt;
        struct hpp_sort_entry *hse;
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2401,7 +2564,7 @@ void sort__setup_elide(FILE *output)
        struct perf_hpp_fmt *fmt;
        struct hpp_sort_entry *hse;
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2413,7 +2576,7 @@ void sort__setup_elide(FILE *output)
         * It makes no sense to elide all of sort entries.
         * Just revert them to show up again.
         */
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2421,7 +2584,7 @@ void sort__setup_elide(FILE *output)
                        return;
        }
 
-       perf_hpp__for_each_format(fmt) {
+       perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
                if (!perf_hpp__is_sort_entry(fmt))
                        continue;
 
@@ -2429,7 +2592,7 @@ void sort__setup_elide(FILE *output)
        }
 }
 
-static int output_field_add(char *tok)
+static int output_field_add(struct perf_hpp_list *list, char *tok)
 {
        unsigned int i;
 
@@ -2439,7 +2602,7 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               return __sort_dimension__add_output(sd);
+               return __sort_dimension__add_output(list, sd);
        }
 
        for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
@@ -2448,7 +2611,7 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, hd->name, strlen(tok)))
                        continue;
 
-               return __hpp_dimension__add_output(hd);
+               return __hpp_dimension__add_output(list, hd);
        }
 
        for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -2457,7 +2620,7 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               return __sort_dimension__add_output(sd);
+               return __sort_dimension__add_output(list, sd);
        }
 
        for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
@@ -2466,12 +2629,32 @@ static int output_field_add(char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               return __sort_dimension__add_output(sd);
+               return __sort_dimension__add_output(list, sd);
        }
 
        return -ESRCH;
 }
 
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+       char *tmp, *tok;
+       int ret = 0;
+
+       for (tok = strtok_r(str, ", ", &tmp);
+                       tok; tok = strtok_r(NULL, ", ", &tmp)) {
+               ret = output_field_add(list, tok);
+               if (ret == -EINVAL) {
+                       error("Invalid --fields key: `%s'", tok);
+                       break;
+               } else if (ret == -ESRCH) {
+                       error("Unknown --fields key: `%s'", tok);
+                       break;
+               }
+       }
+
+       return ret;
+}
+
 static void reset_dimensions(void)
 {
        unsigned int i;
@@ -2496,7 +2679,7 @@ bool is_strict_order(const char *order)
 
 static int __setup_output_field(void)
 {
-       char *tmp, *tok, *str, *strp;
+       char *str, *strp;
        int ret = -EINVAL;
 
        if (field_order == NULL)
@@ -2516,17 +2699,7 @@ static int __setup_output_field(void)
                goto out;
        }
 
-       for (tok = strtok_r(strp, ", ", &tmp);
-                       tok; tok = strtok_r(NULL, ", ", &tmp)) {
-               ret = output_field_add(tok);
-               if (ret == -EINVAL) {
-                       error("Invalid --fields key: `%s'", tok);
-                       break;
-               } else if (ret == -ESRCH) {
-                       error("Unknown --fields key: `%s'", tok);
-                       break;
-               }
-       }
+       ret = setup_output_list(&perf_hpp_list, strp);
 
 out:
        free(str);
@@ -2542,7 +2715,7 @@ int setup_sorting(struct perf_evlist *evlist)
                return err;
 
        if (parent_pattern != default_parent_pattern) {
-               err = sort_dimension__add("parent", evlist);
+               err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
                if (err < 0)
                        return err;
        }
@@ -2560,9 +2733,13 @@ int setup_sorting(struct perf_evlist *evlist)
                return err;
 
        /* copy sort keys to output fields */
-       perf_hpp__setup_output_field();
+       perf_hpp__setup_output_field(&perf_hpp_list);
        /* and then copy output fields to sort keys */
-       perf_hpp__append_sort_keys();
+       perf_hpp__append_sort_keys(&perf_hpp_list);
+
+       /* setup hists-specific output fields */
+       if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+               return -1;
 
        return 0;
 }
@@ -2578,5 +2755,5 @@ void reset_output_field(void)
        sort_order = NULL;
 
        reset_dimensions();
-       perf_hpp__reset_output_field();
+       perf_hpp__reset_output_field(&perf_hpp_list);
 }
index 687bbb1244281ba65a99c3cb78f5bcb8a1eaa41b..3f4e359981192ac50b56e770aa472749666e1f98 100644 (file)
@@ -32,9 +32,12 @@ extern const char default_sort_order[];
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
 extern int sort__need_collapse;
+extern int sort__has_dso;
 extern int sort__has_parent;
 extern int sort__has_sym;
 extern int sort__has_socket;
+extern int sort__has_thread;
+extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;
@@ -94,9 +97,11 @@ struct hist_entry {
        s32                     socket;
        s32                     cpu;
        u8                      cpumode;
+       u8                      depth;
 
        /* We are added by hists__add_dummy_entry. */
        bool                    dummy;
+       bool                    leaf;
 
        char                    level;
        u8                      filtered;
@@ -113,18 +118,28 @@ struct hist_entry {
                        bool    init_have_children;
                        bool    unfolded;
                        bool    has_children;
+                       bool    has_no_entry;
                };
        };
        char                    *srcline;
        char                    *srcfile;
        struct symbol           *parent;
-       struct rb_root          sorted_chain;
        struct branch_info      *branch_info;
        struct hists            *hists;
        struct mem_info         *mem_info;
        void                    *raw_data;
        u32                     raw_size;
        void                    *trace_output;
+       struct perf_hpp_list    *hpp_list;
+       struct hist_entry       *parent_he;
+       union {
+               /* this is for hierarchical entry structure */
+               struct {
+                       struct rb_root  hroot_in;
+                       struct rb_root  hroot_out;
+               };                              /* non-leaf entries */
+               struct rb_root  sorted_chain;   /* leaf entry has callchains */
+       };
        struct callchain_root   callchain[0]; /* must be last member */
 };
 
@@ -160,6 +175,17 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he)
        return period * 100.0 / total_period;
 }
 
+static inline u64 cl_address(u64 address)
+{
+       /* return the cacheline of the address */
+       return (address & ~(cacheline_size - 1));
+}
+
+static inline u64 cl_offset(u64 address)
+{
+       /* return the cacheline of the address */
+       return (address & (cacheline_size - 1));
+}
 
 enum sort_mode {
        SORT_MODE__NORMAL,
@@ -221,6 +247,7 @@ struct sort_entry {
        int64_t (*se_sort)(struct hist_entry *, struct hist_entry *);
        int     (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
                               unsigned int width);
+       int     (*se_filter)(struct hist_entry *he, int type, const void *arg);
        u8      se_width_idx;
 };
 
index 6ac03146889d29be60707efd6e04c27c919f64de..b33ffb2af2cf5900378fb9fb8ef60e9eddaaf9db 100644 (file)
@@ -2,6 +2,7 @@
 #include "evsel.h"
 #include "stat.h"
 #include "color.h"
+#include "pmu.h"
 
 enum {
        CTX_BIT_USER    = 1 << 0,
@@ -14,6 +15,13 @@ enum {
 
 #define NUM_CTX CTX_BIT_MAX
 
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
@@ -28,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static bool have_frontend_stalled;
 
 struct stats walltime_nsecs_stats;
 
+void perf_stat__init_shadow_stats(void)
+{
+       have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+}
+
 static int evsel_context(struct perf_evsel *evsel)
 {
        int ctx = 0;
@@ -137,9 +151,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
        return color;
 }
 
-static void print_stalled_cycles_frontend(FILE *out, int cpu,
+static void print_stalled_cycles_frontend(int cpu,
                                          struct perf_evsel *evsel
-                                         __maybe_unused, double avg)
+                                         __maybe_unused, double avg,
+                                         struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -152,14 +167,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " frontend cycles idle   ");
+       if (ratio)
+               out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+                                 ratio);
+       else
+               out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
 }
 
-static void print_stalled_cycles_backend(FILE *out, int cpu,
+static void print_stalled_cycles_backend(int cpu,
                                         struct perf_evsel *evsel
-                                        __maybe_unused, double avg)
+                                        __maybe_unused, double avg,
+                                        struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -172,14 +190,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " backend  cycles idle   ");
+       out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
 }
 
-static void print_branch_misses(FILE *out, int cpu,
+static void print_branch_misses(int cpu,
                                struct perf_evsel *evsel __maybe_unused,
-                               double avg)
+                               double avg,
+                               struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -192,14 +209,13 @@ static void print_branch_misses(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all branches        ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
 }
 
-static void print_l1_dcache_misses(FILE *out, int cpu,
+static void print_l1_dcache_misses(int cpu,
                                   struct perf_evsel *evsel __maybe_unused,
-                                  double avg)
+                                  double avg,
+                                  struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -212,14 +228,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu,
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all L1-dcache hits  ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
 }
 
-static void print_l1_icache_misses(FILE *out, int cpu,
+static void print_l1_icache_misses(int cpu,
                                   struct perf_evsel *evsel __maybe_unused,
-                                  double avg)
+                                  double avg,
+                                  struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -231,15 +246,13 @@ static void print_l1_icache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all L1-icache hits  ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
 }
 
-static void print_dtlb_cache_misses(FILE *out, int cpu,
+static void print_dtlb_cache_misses(int cpu,
                                    struct perf_evsel *evsel __maybe_unused,
-                                   double avg)
+                                   double avg,
+                                   struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -251,15 +264,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all dTLB cache hits ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
 }
 
-static void print_itlb_cache_misses(FILE *out, int cpu,
+static void print_itlb_cache_misses(int cpu,
                                    struct perf_evsel *evsel __maybe_unused,
-                                   double avg)
+                                   double avg,
+                                   struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -271,15 +282,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all iTLB cache hits ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
 }
 
-static void print_ll_cache_misses(FILE *out, int cpu,
+static void print_ll_cache_misses(int cpu,
                                  struct perf_evsel *evsel __maybe_unused,
-                                 double avg)
+                                 double avg,
+                                 struct perf_stat_output_ctx *out)
 {
        double total, ratio = 0.0;
        const char *color;
@@ -291,15 +300,15 @@ static void print_ll_cache_misses(FILE *out, int cpu,
                ratio = avg / total * 100.0;
 
        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-       fprintf(out, " #  ");
-       color_fprintf(out, color, "%6.2f%%", ratio);
-       fprintf(out, " of all LL-cache hits   ");
+       out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
 }
 
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-                                  double avg, int cpu, enum aggr_mode aggr)
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+                                  double avg, int cpu,
+                                  struct perf_stat_output_ctx *out)
 {
+       void *ctxp = out->ctx;
+       print_metric_t print_metric = out->print_metric;
        double total, ratio = 0.0, total2;
        int ctx = evsel_context(evsel);
 
@@ -307,119 +316,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
                total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
                if (total) {
                        ratio = avg / total;
-                       fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
+                       print_metric(ctxp, NULL, "%7.2f ",
+                                       "insn per cycle", ratio);
                } else {
-                       fprintf(out, "                                   ");
+                       print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
                }
                total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
                total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
                if (total && avg) {
+                       out->new_line(ctxp);
                        ratio = total / avg;
-                       fprintf(out, "\n");
-                       if (aggr == AGGR_NONE)
-                               fprintf(out, "        ");
-                       fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+                       print_metric(ctxp, NULL, "%7.2f ",
+                                       "stalled cycles per insn",
+                                       ratio);
+               } else if (have_frontend_stalled) {
+                       print_metric(ctxp, NULL, NULL,
+                                    "stalled cycles per insn", 0);
                }
-
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-                       runtime_branches_stats[ctx][cpu].n != 0) {
-               print_branch_misses(out, cpu, evsel, avg);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+               if (runtime_branches_stats[ctx][cpu].n != 0)
+                       print_branch_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all branches", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_l1_dcache_stats[ctx][cpu].n != 0) {
-               print_l1_dcache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
+                       print_l1_dcache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_l1_icache_stats[ctx][cpu].n != 0) {
-               print_l1_icache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_l1_icache_stats[ctx][cpu].n != 0)
+                       print_l1_icache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
-               print_dtlb_cache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
+                       print_dtlb_cache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_itlb_cache_stats[ctx][cpu].n != 0) {
-               print_itlb_cache_misses(out, cpu, evsel, avg);
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
+                       print_itlb_cache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
        } else if (
                evsel->attr.type == PERF_TYPE_HW_CACHE &&
                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-                       runtime_ll_cache_stats[ctx][cpu].n != 0) {
-               print_ll_cache_misses(out, cpu, evsel, avg);
-       } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-                       runtime_cacherefs_stats[ctx][cpu].n != 0) {
+                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+               if (runtime_ll_cache_stats[ctx][cpu].n != 0)
+                       print_ll_cache_misses(cpu, evsel, avg, out);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
                total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
 
                if (total)
                        ratio = avg * 100 / total;
 
-               fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
-
+               if (runtime_cacherefs_stats[ctx][cpu].n != 0)
+                       print_metric(ctxp, NULL, "%8.3f %%",
+                                    "of all cache refs", ratio);
+               else
+                       print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-               print_stalled_cycles_frontend(out, cpu, evsel, avg);
+               print_stalled_cycles_frontend(cpu, evsel, avg, out);
        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-               print_stalled_cycles_backend(out, cpu, evsel, avg);
+               print_stalled_cycles_backend(cpu, evsel, avg, out);
        } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
                total = avg_stats(&runtime_nsecs_stats[cpu]);
 
                if (total) {
                        ratio = avg / total;
-                       fprintf(out, " # %8.3f GHz                    ", ratio);
+                       print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
                } else {
-                       fprintf(out, "                                   ");
+                       print_metric(ctxp, NULL, NULL, "Ghz", 0);
                }
        } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
                total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
                if (total)
-                       fprintf(out,
-                               " #   %5.2f%% transactional cycles   ",
-                               100.0 * (avg / total));
+                       print_metric(ctxp, NULL,
+                                       "%7.2f%%", "transactional cycles",
+                                       100.0 * (avg / total));
+               else
+                       print_metric(ctxp, NULL, NULL, "transactional cycles",
+                                    0);
        } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
                total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
                total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
                if (total2 < avg)
                        total2 = avg;
                if (total)
-                       fprintf(out,
-                               " #   %5.2f%% aborted cycles         ",
+                       print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
                                100.0 * ((total2-avg) / total));
-       } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
-                  runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+               else
+                       print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+       } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
                total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
                if (avg)
                        ratio = total / avg;
 
-               fprintf(out, " # %8.0f cycles / transaction   ", ratio);
-       } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
-                  runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+               if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
+                       print_metric(ctxp, NULL, "%8.0f",
+                                    "cycles / transaction", ratio);
+               else
+                       print_metric(ctxp, NULL, NULL, "cycles / transaction",
+                                    0);
+       } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
                total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
                if (avg)
                        ratio = total / avg;
 
-               fprintf(out, " # %8.0f cycles / elision       ", ratio);
+               print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
        } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
                if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
-                       fprintf(out, " # %8.3f CPUs utilized          ", avg / ratio);
+                       print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
+                                    avg / ratio);
                else
-                       fprintf(out, "                                   ");
+                       print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
        } else if (runtime_nsecs_stats[cpu].n != 0) {
                char unit = 'M';
+               char unit_buf[10];
 
                total = avg_stats(&runtime_nsecs_stats[cpu]);
 
@@ -429,9 +464,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
                        ratio *= 1000;
                        unit = 'K';
                }
-
-               fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
+               snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+               print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
        } else {
-               fprintf(out, "                                   ");
+               print_metric(ctxp, NULL, NULL, NULL, 0);
        }
 }
index afb0c45eba34ba8db7a6cb6d258326f545f7aca1..4d9b481cf3b6edbb7d6161cbd238709241dedc5b 100644 (file)
@@ -97,7 +97,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
        }
 }
 
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 {
        int i;
        struct perf_stat_evsel *ps = evsel->priv;
@@ -108,7 +108,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
        perf_stat_evsel_id_init(evsel);
 }
 
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 {
        evsel->priv = zalloc(sizeof(struct perf_stat_evsel));
        if (evsel->priv == NULL)
@@ -117,13 +117,13 @@ int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
        return 0;
 }
 
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 {
        zfree(&evsel->priv);
 }
 
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-                                     int ncpus, int nthreads)
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+                                            int ncpus, int nthreads)
 {
        struct perf_counts *counts;
 
@@ -134,13 +134,13 @@ int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
        return counts ? 0 : -ENOMEM;
 }
 
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 {
        perf_counts__delete(evsel->prev_raw_counts);
        evsel->prev_raw_counts = NULL;
 }
 
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
 {
        int ncpus = perf_evsel__nr_cpus(evsel);
        int nthreads = thread_map__nr(evsel->threads);
index 086f4e128d6351f0e0de862c2ebcc44801cbc2f8..0150e786ccc7c1f48e407323b606f47155a076c9 100644 (file)
@@ -68,21 +68,23 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel);
 
 extern struct stats walltime_nsecs_stats;
 
+typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+                              const char *fmt, double val);
+typedef void (*new_line_t )(void *ctx);
+
+void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
                                    int cpu);
-void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-                                  double avg, int cpu, enum aggr_mode aggr);
-
-void perf_evsel__reset_stat_priv(struct perf_evsel *evsel);
-int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel);
-void perf_evsel__free_stat_priv(struct perf_evsel *evsel);
-
-int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
-                                     int ncpus, int nthreads);
-void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel);
+struct perf_stat_output_ctx {
+       void *ctx;
+       print_metric_t print_metric;
+       new_line_t new_line;
+};
 
-int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw);
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+                                  double avg, int cpu,
+                                  struct perf_stat_output_ctx *out);
 
 int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct perf_evlist *evlist);
index 25671fa166188413c66758a978082e89a1ba08d2..d3d279275432ee663641a1a6dbb0a0cf8d3b8334 100644 (file)
@@ -51,30 +51,6 @@ void strbuf_grow(struct strbuf *sb, size_t extra)
        ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
 }
 
-static void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
-                                  const void *data, size_t dlen)
-{
-       if (pos + len < pos)
-               die("you want to use way too much memory");
-       if (pos > sb->len)
-               die("`pos' is too far after the end of the buffer");
-       if (pos + len > sb->len)
-               die("`pos + len' is too far after the end of the buffer");
-
-       if (dlen >= len)
-               strbuf_grow(sb, dlen - len);
-       memmove(sb->buf + pos + dlen,
-                       sb->buf + pos + len,
-                       sb->len - pos - len);
-       memcpy(sb->buf + pos, data, dlen);
-       strbuf_setlen(sb, sb->len + dlen - len);
-}
-
-void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
-{
-       strbuf_splice(sb, pos, len, NULL, 0);
-}
-
 void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 {
        strbuf_grow(sb, len);
index 529f2f03524915ab9cae7c5608de444fd875812d..7a32c838884d8de6feaa3223e388199c5c939301 100644 (file)
@@ -77,8 +77,6 @@ static inline void strbuf_addch(struct strbuf *sb, int c) {
        sb->buf[sb->len] = '\0';
 }
 
-extern void strbuf_remove(struct strbuf *, size_t pos, size_t len);
-
 extern void strbuf_add(struct strbuf *, const void *, size_t);
 static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
        strbuf_add(sb, s, strlen(s));
index 562b8ebeae5b2414b6bac867c928cb22ee9a5f13..b1dd68f358fcd8e7390b5929ed736a357c7853c1 100644 (file)
@@ -6,6 +6,7 @@
 #include <inttypes.h>
 
 #include "symbol.h"
+#include "demangle-java.h"
 #include "machine.h"
 #include "vdso.h"
 #include <symbol/kallsyms.h>
@@ -1077,6 +1078,8 @@ new_symbol:
                                demangle_flags = DMGL_PARAMS | DMGL_ANSI;
 
                        demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+                       if (demangled == NULL)
+                               demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
                        if (demangled != NULL)
                                elf_name = demangled;
                }
index ab02209a7cf3b162431bfc006287f5fbd8c68f43..e7588dc915181729394c1d2195c78576c47d20df 100644 (file)
@@ -1466,7 +1466,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         * Read the build id if possible. This is required for
         * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
         */
-       if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
+       if (is_regular_file(name) &&
+           filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
                dso__set_build_id(dso, build_id);
 
        /*
@@ -1487,6 +1488,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
                                                   root_dir, name, PATH_MAX))
                        continue;
 
+               if (!is_regular_file(name))
+                       continue;
+
                /* Name is now the name of the next image to try */
                if (symsrc__init(ss, dso, name, symtab_type) < 0)
                        continue;
@@ -1525,6 +1529,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
        if (!runtime_ss && syms_ss)
                runtime_ss = syms_ss;
 
+       if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
+               if (dso__build_id_is_kmod(dso, name, PATH_MAX))
+                       kmod = true;
+
        if (syms_ss)
                ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod);
        else
index ccd1caa40e116645be37025665388886d7c97546..a937053a0ae07a6214fb03753faa66819ed40884 100644 (file)
@@ -110,7 +110,8 @@ struct symbol_conf {
                        has_filter,
                        show_ref_callgraph,
                        hide_unresolved,
-                       raw_trace;
+                       raw_trace,
+                       report_hierarchy;
        const char      *vmlinux_name,
                        *kallsyms_name,
                        *source_prefix,
index 802bb868d446cafa7c5383982193ad13d87b785a..8ae051e0ec79090e674fdf9a6cc9fbece8275ebb 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <traceevent/event-parse.h>
 #include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
 #include "trace-event.h"
 #include "machine.h"
 #include "util.h"
index 4d4210d4e13d1d7fbc32a5c1f4afff58a0810fd0..1b741646eed00b7754ba0a618b4fb57b8c87d9ac 100644 (file)
@@ -19,7 +19,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
        u64 quot, rem;
 
        quot = cyc >> tc->time_shift;
-       rem  = cyc & ((1 << tc->time_shift) - 1);
+       rem  = cyc & (((u64)1 << tc->time_shift) - 1);
        return tc->time_zero + quot * tc->time_mult +
               ((rem * tc->time_mult) >> tc->time_shift);
 }
index ead9509835d23e2aee29b9f3548613973a5937fb..b7766c577b015d978fd3e9960c451692f81daa6e 100644 (file)
@@ -14,6 +14,7 @@
 #include <limits.h>
 #include <byteswap.h>
 #include <linux/kernel.h>
+#include <linux/log2.h>
 #include <unistd.h>
 #include "callchain.h"
 #include "strlist.h"
@@ -507,54 +508,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
        return ret;
 }
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep)
-{
-       size_t size = 0, alloc_size = 0;
-       void *bf = NULL, *nbf;
-       int fd, n, err = 0;
-       char sbuf[STRERR_BUFSIZE];
-
-       fd = open(filename, O_RDONLY);
-       if (fd < 0)
-               return -errno;
-
-       do {
-               if (size == alloc_size) {
-                       alloc_size += BUFSIZ;
-                       nbf = realloc(bf, alloc_size);
-                       if (!nbf) {
-                               err = -ENOMEM;
-                               break;
-                       }
-
-                       bf = nbf;
-               }
-
-               n = read(fd, bf + size, alloc_size - size);
-               if (n < 0) {
-                       if (size) {
-                               pr_warning("read failed %d: %s\n", errno,
-                                        strerror_r(errno, sbuf, sizeof(sbuf)));
-                               err = 0;
-                       } else
-                               err = -errno;
-
-                       break;
-               }
-
-               size += n;
-       } while (n > 0);
-
-       if (!err) {
-               *sizep = size;
-               *buf   = bf;
-       } else
-               free(bf);
-
-       close(fd);
-       return err;
-}
-
 const char *get_filename_for_perf_kvm(void)
 {
        const char *filename;
@@ -691,3 +644,66 @@ out:
 
        return tip;
 }
+
+bool is_regular_file(const char *file)
+{
+       struct stat st;
+
+       if (stat(file, &st))
+               return false;
+
+       return S_ISREG(st.st_mode);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+       struct timeval tv;
+       struct tm tm;
+       char dt[32];
+
+       if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+               return -1;
+
+       if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+               return -1;
+
+       scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+       return 0;
+}
+
+void print_binary(unsigned char *data, size_t len,
+                 size_t bytes_per_line, print_binary_t printer,
+                 void *extra)
+{
+       size_t i, j, mask;
+
+       if (!printer)
+               return;
+
+       bytes_per_line = roundup_pow_of_two(bytes_per_line);
+       mask = bytes_per_line - 1;
+
+       printer(BINARY_PRINT_DATA_BEGIN, 0, extra);
+       for (i = 0; i < len; i++) {
+               if ((i & mask) == 0) {
+                       printer(BINARY_PRINT_LINE_BEGIN, -1, extra);
+                       printer(BINARY_PRINT_ADDR, i, extra);
+               }
+
+               printer(BINARY_PRINT_NUM_DATA, data[i], extra);
+
+               if (((i & mask) == mask) || i == len - 1) {
+                       for (j = 0; j < mask-(i & mask); j++)
+                               printer(BINARY_PRINT_NUM_PAD, -1, extra);
+
+                       printer(BINARY_PRINT_SEP, i, extra);
+                       for (j = i & ~mask; j <= i; j++)
+                               printer(BINARY_PRINT_CHAR_DATA, data[j], extra);
+                       for (j = 0; j < mask-(i & mask); j++)
+                               printer(BINARY_PRINT_CHAR_PAD, i, extra);
+                       printer(BINARY_PRINT_LINE_END, -1, extra);
+               }
+       }
+       printer(BINARY_PRINT_DATA_END, -1, extra);
+}
index fe915e616f9b65388e15be963d0ef5cf58c325fd..d0d50cef8b2af31703c7d298f68d93b68326c3c5 100644 (file)
@@ -82,6 +82,8 @@
 
 extern const char *graph_line;
 extern const char *graph_dotted_line;
+extern const char *spaces;
+extern const char *dots;
 extern char buildid_dir[];
 
 /* On most systems <limits.h> would have given us this, but
@@ -303,7 +305,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
                  bool show_sym, bool unwind_inlines);
 void free_srcline(char *srcline);
 
-int filename__read_str(const char *filename, char **buf, size_t *sizep);
 int perf_event_paranoid(void);
 
 void mem_bswap_64(void *src, int byte_size);
@@ -343,5 +344,27 @@ int fetch_kernel_version(unsigned int *puint,
 #define KVER_PARAM(x)  KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
 
 const char *perf_tip(const char *dirpath);
+bool is_regular_file(const char *file);
+int fetch_current_timestamp(char *buf, size_t sz);
+
+enum binary_printer_ops {
+       BINARY_PRINT_DATA_BEGIN,
+       BINARY_PRINT_LINE_BEGIN,
+       BINARY_PRINT_ADDR,
+       BINARY_PRINT_NUM_DATA,
+       BINARY_PRINT_NUM_PAD,
+       BINARY_PRINT_SEP,
+       BINARY_PRINT_CHAR_DATA,
+       BINARY_PRINT_CHAR_PAD,
+       BINARY_PRINT_LINE_END,
+       BINARY_PRINT_DATA_END,
+};
+
+typedef void (*print_binary_t)(enum binary_printer_ops,
+                              unsigned int val,
+                              void *extra);
 
+void print_binary(unsigned char *data, size_t len,
+                 size_t bytes_per_line, print_binary_t printer,
+                 void *extra);
 #endif /* GIT_COMPAT_UTIL_H */
index 0dac7e05a6ac9e5f1500eca1cebbce2d900ab511..3fa94e291d16a2891b3799797c1a8da289d89b20 100644 (file)
@@ -1970,7 +1970,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
 }
 
 static void
-dump_cstate_pstate_config_info(family, model)
+dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 {
        if (!do_nhm_platform_info)
                return;
@@ -2142,7 +2142,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define        RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
 #define        RAPL_TIME_GRANULARITY   0x3F /* 6 bit time granularity */
 
-double get_tdp(model)
+double get_tdp(unsigned int model)
 {
        unsigned long long msr;
 
@@ -2256,7 +2256,7 @@ void rapl_probe(unsigned int family, unsigned int model)
        return;
 }
 
-void perf_limit_reasons_probe(family, model)
+void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 {
        if (!genuine_intel)
                return;
@@ -2792,7 +2792,7 @@ void process_cpuid()
        perf_limit_reasons_probe(family, model);
 
        if (debug)
-               dump_cstate_pstate_config_info();
+               dump_cstate_pstate_config_info(family, model);
 
        if (has_skl_msrs(family, model))
                calculate_tsc_tweak();
index a11cfd20a6a0d2aa86b1d06b8552bc41a8bfd8c9..9102ae172d2a92e06ad6dc9155985b5103703a92 100644 (file)
@@ -1952,6 +1952,9 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
        else
                val *= halt_poll_ns_grow;
 
+       if (val > halt_poll_ns)
+               val = halt_poll_ns;
+
        vcpu->halt_poll_ns = val;
        trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
 }